diff --git a/velox/duckdb/README.md b/velox/duckdb/README.md index 52f7273d8944..0302b7bbf903 100644 --- a/velox/duckdb/README.md +++ b/velox/duckdb/README.md @@ -9,13 +9,11 @@ a new feature, first clone DuckDB git repository: Then generate the amalgamated .cpp and .hpp files: python3 scripts/amalgamation.py --extended --splits=8 - python3 scripts/parquet_amalgamation.py Then copy the generated files to velox/external/duckdb: export VELOX_PATH="" rsync -vrh src/amalgamation/duckdb* ${VELOX_PATH}/velox/external/duckdb/ - rsync -vrh src/amalgamation/parquet* ${VELOX_PATH}/velox/external/duckdb/ We also maintain a copy of TPC-H dataset generators that need to be updated: diff --git a/velox/dwio/common/tests/BitPackDecoderBenchmark.cpp b/velox/dwio/common/tests/BitPackDecoderBenchmark.cpp index 162f30fa9d26..62d847fd99ec 100644 --- a/velox/dwio/common/tests/BitPackDecoderBenchmark.cpp +++ b/velox/dwio/common/tests/BitPackDecoderBenchmark.cpp @@ -23,13 +23,12 @@ #endif #include "velox/external/duckdb/duckdb-fastpforlib.hpp" -#include "velox/external/duckdb/parquet-amalgamation.hpp" -#include "velox/vector/TypeAliases.h" #include #include #include #include +#include "velox/external/duckdb/duckdb.hpp" using namespace folly; using namespace facebook::velox; @@ -38,6 +37,187 @@ using RowSet = folly::Range; static const uint64_t kNumValues = 1024768 * 8; +namespace duckdb { + +class ByteBuffer { // on to the 10 thousandth impl + public: + ByteBuffer() {} + ByteBuffer(char* ptr, uint64_t len) : ptr(ptr), len(len) {} + + char* ptr = nullptr; + uint64_t len = 0; + + public: + void inc(uint64_t increment) { + available(increment); + len -= increment; + ptr += increment; + } + + template + T read() { + T val = get(); + inc(sizeof(T)); + return val; + } + + template + T get() { + available(sizeof(T)); + T val = Load((data_ptr_t)ptr); + return val; + } + + void copy_to(char* dest, uint64_t len) { + available(len); + std::memcpy(dest, ptr, len); + } + + void zero() { + std::memset(ptr, 0, len); + } + + void available(uint64_t req_len) { + if (req_len > len) { + throw std::runtime_error("Out of buffer"); + } + } +}; + +class ParquetDecodeUtils { + public: + template + static T ZigzagToInt(const T& n) { + return (n >> 1) ^ -(n & 1); + } + + static const uint64_t BITPACK_MASKS[]; + static const uint64_t BITPACK_MASKS_SIZE; + static const uint8_t BITPACK_DLEN; + + template + static uint32_t BitUnpack( + ByteBuffer& buffer, + uint8_t& bitpack_pos, + T* dest, + uint32_t count, + uint8_t width) { + if (width >= ParquetDecodeUtils::BITPACK_MASKS_SIZE) { + throw InvalidInputException( + "The width (%d) of the bitpacked data exceeds the supported max width (%d), " + "the file might be corrupted.", + width, + ParquetDecodeUtils::BITPACK_MASKS_SIZE); + } + auto mask = BITPACK_MASKS[width]; + + for (uint32_t i = 0; i < count; i++) { + T val = (buffer.get() >> bitpack_pos) & mask; + bitpack_pos += width; + while (bitpack_pos > BITPACK_DLEN) { + buffer.inc(1); + val |= (T(buffer.get()) + << T(BITPACK_DLEN - (bitpack_pos - width))) & + mask; + bitpack_pos -= BITPACK_DLEN; + } + dest[i] = val; + } + return count; + } + + template + static T VarintDecode(ByteBuffer& buf) { + T result = 0; + uint8_t shift = 0; + while (true) { + auto byte = buf.read(); + result |= T(byte & 127) << shift; + if ((byte & 128) == 0) { + break; + } + shift += 7; + if (shift > sizeof(T) * 8) { + throw std::runtime_error("Varint-decoding found too large number"); + } + } + return result; + } +}; +} // namespace duckdb + +const uint64_t duckdb::ParquetDecodeUtils::BITPACK_MASKS[] = { + 0, + 1, + 3, + 7, + 15, + 31, + 63, + 127, + 255, + 511, + 1023, + 2047, + 4095, + 8191, + 16383, + 32767, + 65535, + 131071, + 262143, + 524287, + 1048575, + 2097151, + 4194303, + 8388607, + 16777215, + 33554431, + 67108863, + 134217727, + 268435455, + 536870911, + 1073741823, + 2147483647, + 4294967295, + 8589934591, + 17179869183, + 34359738367, + 68719476735, + 137438953471, + 274877906943, + 549755813887, + 1099511627775, + 2199023255551, + 4398046511103, + 8796093022207, + 17592186044415, + 35184372088831, + 70368744177663, + 140737488355327, + 281474976710655, + 562949953421311, + 1125899906842623, + 2251799813685247, + 4503599627370495, + 9007199254740991, + 18014398509481983, + 36028797018963967, + 72057594037927935, + 144115188075855871, + 288230376151711743, + 576460752303423487, + 1152921504606846975, + 2305843009213693951, + 4611686018427387903, + 9223372036854775807, + 18446744073709551615ULL}; + +const uint64_t duckdb::ParquetDecodeUtils::BITPACK_MASKS_SIZE = + sizeof(ParquetDecodeUtils::BITPACK_MASKS) / sizeof(uint64_t); + +const uint8_t duckdb::ParquetDecodeUtils::BITPACK_DLEN = 8; + // Array of bit packed representations of randomInts_u32. The array at index i // is packed i bits wide and the values come from the low bits of std::vector> bitPackedData; diff --git a/velox/external/duckdb/CMakeLists.txt b/velox/external/duckdb/CMakeLists.txt index c65c3d33c7da..fac01e1d2db6 100644 --- a/velox/external/duckdb/CMakeLists.txt +++ b/velox/external/duckdb/CMakeLists.txt @@ -12,7 +12,6 @@ add_subdirectory(tpch) add_compile_definitions(DISABLE_DUCKDB_REMOTE_INSTALL) -add_compile_definitions(BUILD_PARQUET_EXTENSION) add_library( duckdb @@ -30,8 +29,7 @@ add_library( duckdb-libpg_query.cpp duckdb-miniz.cpp duckdb-re2.cpp - duckdb-utf8proc.cpp - parquet-amalgamation.cpp) + duckdb-utf8proc.cpp) set_property(TARGET duckdb PROPERTY JOB_POOL_COMPILE high_memory_pool) diff --git a/velox/external/duckdb/parquet-amalgamation.cpp b/velox/external/duckdb/parquet-amalgamation.cpp deleted file mode 100644 index 8c4b6662d4d2..000000000000 --- a/velox/external/duckdb/parquet-amalgamation.cpp +++ /dev/null @@ -1,43608 +0,0 @@ -#include "duckdb.hpp" -#ifdef DUCKDB_AMALGAMATION -#ifndef DUCKDB_AMALGAMATION_EXTENDED -#error Parquet amalgamation requires extended DuckDB amalgamation (--extended) -#endif -#endif -#include "parquet-amalgamation.hpp" - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ -#define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ - -#include - -namespace duckdb_snappy { - -// A Sink is an interface that consumes a sequence of bytes. -class Sink { - public: - Sink() { } - virtual ~Sink(); - - // Append "bytes[0,n-1]" to this. - virtual void Append(const char* bytes, size_t n) = 0; - - // Returns a writable buffer of the specified length for appending. - // May return a pointer to the caller-owned scratch buffer which - // must have at least the indicated length. The returned buffer is - // only valid until the next operation on this Sink. - // - // After writing at most "length" bytes, call Append() with the - // pointer returned from this function and the number of bytes - // written. Many Append() implementations will avoid copying - // bytes if this function returned an internal buffer. - // - // If a non-scratch buffer is returned, the caller may only pass a - // prefix of it to Append(). That is, it is not correct to pass an - // interior pointer of the returned array to Append(). - // - // The default implementation always returns the scratch buffer. - virtual char* GetAppendBuffer(size_t length, char* scratch); - - // For higher performance, Sink implementations can provide custom - // AppendAndTakeOwnership() and GetAppendBufferVariable() methods. - // These methods can reduce the number of copies done during - // compression/decompression. - - // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes" - // and calls the deleter function as (*deleter)(deleter_arg, bytes, n) - // to free the buffer. deleter function must be non NULL. - // - // The default implementation just calls Append and frees "bytes". - // Other implementations may avoid a copy while appending the buffer. - virtual void AppendAndTakeOwnership( - char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), - void *deleter_arg); - - // Returns a writable buffer for appending and writes the buffer's capacity to - // *allocated_size. Guarantees *allocated_size >= min_size. - // May return a pointer to the caller-owned scratch buffer which must have - // scratch_size >= min_size. - // - // The returned buffer is only valid until the next operation - // on this ByteSink. - // - // After writing at most *allocated_size bytes, call Append() with the - // pointer returned from this function and the number of bytes written. - // Many Append() implementations will avoid copying bytes if this function - // returned an internal buffer. - // - // If the sink implementation allocates or reallocates an internal buffer, - // it should use the desired_size_hint if appropriate. If a caller cannot - // provide a reasonable guess at the desired capacity, it should set - // desired_size_hint = 0. - // - // If a non-scratch buffer is returned, the caller may only pass - // a prefix to it to Append(). That is, it is not correct to pass an - // interior pointer to Append(). - // - // The default implementation always returns the scratch buffer. - virtual char* GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size); - - private: - // No copying - Sink(const Sink&); - void operator=(const Sink&); -}; - -// A Source is an interface that yields a sequence of bytes -class Source { - public: - Source() { } - virtual ~Source(); - - // Return the number of bytes left to read from the source - virtual size_t Available() const = 0; - - // Peek at the next flat region of the source. Does not reposition - // the source. The returned region is empty iff Available()==0. - // - // Returns a pointer to the beginning of the region and store its - // length in *len. - // - // The returned region is valid until the next call to Skip() or - // until this object is destroyed, whichever occurs first. - // - // The returned region may be larger than Available() (for example - // if this ByteSource is a view on a substring of a larger source). - // The caller is responsible for ensuring that it only reads the - // Available() bytes. - virtual const char* Peek(size_t* len) = 0; - - // Skip the next n bytes. Invalidates any buffer returned by - // a previous call to Peek(). - // REQUIRES: Available() >= n - virtual void Skip(size_t n) = 0; - - private: - // No copying - Source(const Source&); - void operator=(const Source&); -}; - -// A Source implementation that yields the contents of a flat array -class ByteArraySource : public Source { - public: - ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } - virtual ~ByteArraySource(); - virtual size_t Available() const; - virtual const char* Peek(size_t* len); - virtual void Skip(size_t n); - private: - const char* ptr_; - size_t left_; -}; - -// A Sink implementation that writes to a flat array without any bound checks. -class UncheckedByteArraySink : public Sink { - public: - explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } - virtual ~UncheckedByteArraySink(); - virtual void Append(const char* data, size_t n); - virtual char* GetAppendBuffer(size_t len, char* scratch); - virtual char* GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size); - virtual void AppendAndTakeOwnership( - char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), - void *deleter_arg); - - // Return the current output pointer so that a caller can see how - // many bytes were produced. - // Note: this is not a Sink method. - char* CurrentDestination() const { return dest_; } - private: - char* dest_; -}; - -} // namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ - - -// LICENSE_CHANGE_END - - -namespace duckdb_snappy { - -Source::~Source() { } - -Sink::~Sink() { } - -char* Sink::GetAppendBuffer(size_t length, char* scratch) { - return scratch; -} - -char* Sink::GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size) { - *allocated_size = scratch_size; - return scratch; -} - -void Sink::AppendAndTakeOwnership( - char* bytes, size_t n, - void (*deleter)(void*, const char*, size_t), - void *deleter_arg) { - Append(bytes, n); - (*deleter)(deleter_arg, bytes, n); -} - -ByteArraySource::~ByteArraySource() { } - -size_t ByteArraySource::Available() const { return left_; } - -const char* ByteArraySource::Peek(size_t* len) { - *len = left_; - return ptr_; -} - -void ByteArraySource::Skip(size_t n) { - left_ -= n; - ptr_ += n; -} - -UncheckedByteArraySink::~UncheckedByteArraySink() { } - -void UncheckedByteArraySink::Append(const char* data, size_t n) { - // Do no copying if the caller filled in the result of GetAppendBuffer() - if (data != dest_) { - memcpy(dest_, data, n); - } - dest_ += n; -} - -char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { - return dest_; -} - -void UncheckedByteArraySink::AppendAndTakeOwnership( - char* data, size_t n, - void (*deleter)(void*, const char*, size_t), - void *deleter_arg) { - if (data != dest_) { - memcpy(dest_, data, n); - (*deleter)(deleter_arg, data, n); - } - dest_ += n; -} - -char* UncheckedByteArraySink::GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size) { - *allocated_size = desired_size_hint; - return dest_; -} - -} // namespace duckdb_snappy - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Various stubs for the open-source version of Snappy. - -#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ -#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ - -// #ifdef HAVE_CONFIG_H -// #include "config.h" -// #endif - -#include - -#include -#include -#include - -#ifdef HAVE_SYS_MMAN_H -#include -#endif - -#ifdef HAVE_UNISTD_H -#include -#endif - -#if defined(_MSC_VER) -#include -#endif // defined(_MSC_VER) - -#ifndef __has_feature -#define __has_feature(x) 0 -#endif - -#if __has_feature(memory_sanitizer) -#include -#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ - __msan_unpoison((address), (size)) -#else -#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ -#endif // __has_feature(memory_sanitizer) - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Various type stubs for the open-source version of Snappy. -// -// This file cannot include config.h, as it is included from snappy.h, -// which is a public header. Instead, snappy-stubs-public.h is generated by -// from snappy-stubs-public.h.in at configure time. - -#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ -#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ - -#include -#include -#include - -#ifndef _WIN32 // HAVE_SYS_UIO_H -#include -#endif // HAVE_SYS_UIO_H - -#define SNAPPY_MAJOR 1 -#define SNAPPY_MINOR 1 -#define SNAPPY_PATCHLEVEL 7 -#define SNAPPY_VERSION \ - ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) - -namespace duckdb_snappy { - -using int8 = std::int8_t; -using uint8 = std::uint8_t; -using int16 = std::int16_t; -using uint16 = std::uint16_t; -using int32 = std::int32_t; -using uint32 = std::uint32_t; -using int64 = std::int64_t; -using uint64 = std::uint64_t; - -using string = std::string; - -#ifdef _WIN32 // !HAVE_SYS_UIO_H -// Windows does not have an iovec type, yet the concept is universally useful. -// It is simple to define it ourselves, so we put it inside our own namespace. -struct iovec { - void* iov_base; - size_t iov_len; -}; -#endif // !HAVE_SYS_UIO_H - -} // namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ - - -// LICENSE_CHANGE_END - - -#if defined(__x86_64__) - -// Enable 64-bit optimized versions of some routines. -#define ARCH_K8 1 - -#elif defined(__ppc64__) - -#define ARCH_PPC 1 - -#elif defined(__aarch64__) - -#define ARCH_ARM 1 - -#endif - -// Needed by OS X, among others. -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -// The size of an array, if known at compile-time. -// Will give unexpected results if used on a pointer. -// We undefine it first, since some compilers already have a definition. -#ifdef ARRAYSIZE -#undef ARRAYSIZE -#endif -#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) - -// Static prediction hints. -#ifdef HAVE_BUILTIN_EXPECT -#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) -#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) -#else -#define SNAPPY_PREDICT_FALSE(x) x -#define SNAPPY_PREDICT_TRUE(x) x -#endif - -// This is only used for recomputing the tag byte table used during -// decompression; for simplicity we just remove it from the open-source -// version (anyone who wants to regenerate it can just do the call -// themselves within main()). -#define DEFINE_bool(flag_name, default_value, description) \ - bool FLAGS_ ## flag_name = default_value -#define DECLARE_bool(flag_name) \ - extern bool FLAGS_ ## flag_name - -namespace duckdb_snappy { - -//static const uint32 kuint32max = static_cast(0xFFFFFFFF); -//static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); - - -// HM: Always use aligned load to keep ourselves out of trouble. Sorry. - -inline uint16 UNALIGNED_LOAD16(const void *p) { - uint16 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline uint32 UNALIGNED_LOAD32(const void *p) { - uint32 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline uint64 UNALIGNED_LOAD64(const void *p) { - uint64 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline void UNALIGNED_STORE16(void *p, uint16 v) { - memcpy(p, &v, sizeof v); -} - -inline void UNALIGNED_STORE32(void *p, uint32 v) { - memcpy(p, &v, sizeof v); -} - -inline void UNALIGNED_STORE64(void *p, uint64 v) { - memcpy(p, &v, sizeof v); -} - - -// The following guarantees declaration of the byte swap functions. -#if defined(SNAPPY_IS_BIG_ENDIAN) - -#ifdef HAVE_SYS_BYTEORDER_H -#include -#endif - -#ifdef HAVE_SYS_ENDIAN_H -#include -#endif - -#ifdef _MSC_VER -#include -#define bswap_16(x) _byteswap_ushort(x) -#define bswap_32(x) _byteswap_ulong(x) -#define bswap_64(x) _byteswap_uint64(x) - -#elif defined(__APPLE__) -// Mac OS X / Darwin features -#include -#define bswap_16(x) OSSwapInt16(x) -#define bswap_32(x) OSSwapInt32(x) -#define bswap_64(x) OSSwapInt64(x) - -#elif defined(HAVE_BYTESWAP_H) -#include - -#elif defined(bswap32) -// FreeBSD defines bswap{16,32,64} in (already #included). -#define bswap_16(x) bswap16(x) -#define bswap_32(x) bswap32(x) -#define bswap_64(x) bswap64(x) - -#elif defined(BSWAP_64) -// Solaris 10 defines BSWAP_{16,32,64} in (already #included). -#define bswap_16(x) BSWAP_16(x) -#define bswap_32(x) BSWAP_32(x) -#define bswap_64(x) BSWAP_64(x) - -#else - -inline uint16 bswap_16(uint16 x) { - return (x << 8) | (x >> 8); -} - -inline uint32 bswap_32(uint32 x) { - x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); - return (x >> 16) | (x << 16); -} - -inline uint64 bswap_64(uint64 x) { - x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8); - x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16); - return (x >> 32) | (x << 32); -} - -#endif - -#endif // defined(SNAPPY_IS_BIG_ENDIAN) - -// Convert to little-endian storage, opposite of network format. -// Convert x from host to little endian: x = LittleEndian.FromHost(x); -// convert x from little endian to host: x = LittleEndian.ToHost(x); -// -// Store values into unaligned memory converting to little endian order: -// LittleEndian.Store16(p, x); -// -// Load unaligned values stored in little endian converting to host order: -// x = LittleEndian.Load16(p); -class LittleEndian { - public: - // Conversion functions. -#if defined(SNAPPY_IS_BIG_ENDIAN) - - static uint16 FromHost16(uint16 x) { return bswap_16(x); } - static uint16 ToHost16(uint16 x) { return bswap_16(x); } - - static uint32 FromHost32(uint32 x) { return bswap_32(x); } - static uint32 ToHost32(uint32 x) { return bswap_32(x); } - - static bool IsLittleEndian() { return false; } - -#else // !defined(SNAPPY_IS_BIG_ENDIAN) - - static uint16 FromHost16(uint16 x) { return x; } - static uint16 ToHost16(uint16 x) { return x; } - - static uint32 FromHost32(uint32 x) { return x; } - static uint32 ToHost32(uint32 x) { return x; } - - static bool IsLittleEndian() { return true; } - -#endif // !defined(SNAPPY_IS_BIG_ENDIAN) - - // Functions to do unaligned loads and stores in little-endian order. - static uint16 Load16(const void *p) { - return ToHost16(UNALIGNED_LOAD16(p)); - } - - static void Store16(void *p, uint16 v) { - UNALIGNED_STORE16(p, FromHost16(v)); - } - - static uint32 Load32(const void *p) { - return ToHost32(UNALIGNED_LOAD32(p)); - } - - static void Store32(void *p, uint32 v) { - UNALIGNED_STORE32(p, FromHost32(v)); - } -}; - -// Some bit-manipulation functions. -class Bits { - public: - // Return floor(log2(n)) for positive integer n. - static int Log2FloorNonZero(uint32 n); - - // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. - static int Log2Floor(uint32 n); - - // Return the first set least / most significant bit, 0-indexed. Returns an - // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except - // that it's 0-indexed. - static int FindLSBSetNonZero(uint32 n); - -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - static int FindLSBSetNonZero64(uint64 n); -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - - private: - // No copying - Bits(const Bits&); - void operator=(const Bits&); -}; - -#ifdef HAVE_BUILTIN_CTZ - -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); - // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof - // represents subtraction in base 2 and observes that there's no carry. - // - // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x). - // Using "31 ^" here instead of "31 -" allows the optimizer to strip the - // function body down to _bit_scan_reverse(x). - return 31 ^ __builtin_clz(n); -} - -inline int Bits::Log2Floor(uint32 n) { - return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); -} - -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); - return __builtin_ctz(n); -} - -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - return __builtin_ctzll(n); -} -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - -#elif defined(_MSC_VER) - -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); - unsigned long where; - _BitScanReverse(&where, n); - return static_cast(where); -} - -inline int Bits::Log2Floor(uint32 n) { - unsigned long where; - if (_BitScanReverse(&where, n)) - return static_cast(where); - return -1; -} - -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); - unsigned long where; - if (_BitScanForward(&where, n)) - return static_cast(where); - return 32; -} - -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - unsigned long where; - if (_BitScanForward64(&where, n)) - return static_cast(where); - return 64; -} -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - -#else // Portable versions. - -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); - - int log = 0; - uint32 value = n; - for (int i = 4; i >= 0; --i) { - int shift = (1 << i); - uint32 x = value >> shift; - if (x != 0) { - value = x; - log += shift; - } - } - assert(value == 1); - return log; -} - -inline int Bits::Log2Floor(uint32 n) { - return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); -} - -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); - - int rc = 31; - for (int i = 4, shift = 1 << 4; i >= 0; --i) { - const uint32 x = n << shift; - if (x != 0) { - n = x; - rc -= shift; - } - shift >>= 1; - } - return rc; -} - -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - - const uint32 bottombits = static_cast(n); - if (bottombits == 0) { - // Bottom bits are zero, so scan in top bits - return 32 + FindLSBSetNonZero(static_cast(n >> 32)); - } else { - return FindLSBSetNonZero(bottombits); - } -} -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - -#endif // End portable versions. - -// Variable-length integer encoding. -class Varint { - public: - // Maximum lengths of varint encoding of uint32. - static const int kMax32 = 5; - - // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. - // Never reads a character at or beyond limit. If a valid/terminated varint32 - // was found in the range, stores it in *OUTPUT and returns a pointer just - // past the last byte of the varint32. Else returns NULL. On success, - // "result <= limit". - static const char* Parse32WithLimit(const char* ptr, const char* limit, - uint32* OUTPUT); - - // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". - // EFFECTS Encodes "v" into "ptr" and returns a pointer to the - // byte just past the last encoded byte. - static char* Encode32(char* ptr, uint32 v); - - // EFFECTS Appends the varint representation of "value" to "*s". - static void Append32(string* s, uint32 value); -}; - -inline const char* Varint::Parse32WithLimit(const char* p, - const char* l, - uint32* OUTPUT) { - const unsigned char* ptr = reinterpret_cast(p); - const unsigned char* limit = reinterpret_cast(l); - uint32 b, result; - if (ptr >= limit) return NULL; - b = *(ptr++); result = b & 127; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; - return NULL; // Value is too long to be a varint32 - done: - *OUTPUT = result; - return reinterpret_cast(ptr); -} - -inline char* Varint::Encode32(char* sptr, uint32 v) { - // Operate on characters as unsigneds - unsigned char* ptr = reinterpret_cast(sptr); - static const int B = 128; - if (v < (1<<7)) { - *(ptr++) = v; - } else if (v < (1<<14)) { - *(ptr++) = v | B; - *(ptr++) = v>>7; - } else if (v < (1<<21)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = v>>14; - } else if (v < (1<<28)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = v>>21; - } else { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = (v>>21) | B; - *(ptr++) = v>>28; - } - return reinterpret_cast(ptr); -} - -// If you know the internal layout of the std::string in use, you can -// replace this function with one that resizes the string without -// filling the new space with zeros (if applicable) -- -// it will be non-portable but faster. -inline void STLStringResizeUninitialized(string* s, size_t new_size) { - s->resize(new_size); -} - -// Return a mutable char* pointing to a string's internal buffer, -// which may not be null-terminated. Writing through this pointer will -// modify the string. -// -// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the -// next call to a string method that invalidates iterators. -// -// As of 2006-04, there is no standard-blessed way of getting a -// mutable reference to a string's internal buffer. However, issue 530 -// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) -// proposes this as the method. It will officially be part of the standard -// for C++0x. This should already work on all current implementations. -inline char* string_as_array(string* str) { - return str->empty() ? NULL : &*str->begin(); -} - -} // namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ - - -// LICENSE_CHANGE_END - - -namespace duckdb_snappy { - -void Varint::Append32(string* s, uint32 value) { - char buf[Varint::kMax32]; - const char* p = Varint::Encode32(buf, value); - s->append(buf, p - buf); -} - -} // namespace duckdb_snappy - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2005 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2005 and onwards Google Inc. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// A light-weight compression algorithm. It is designed for speed of -// compression and decompression, rather than for the utmost in space -// savings. -// -// For getting better compression ratios when you are compressing data -// with long repeated sequences or compressing data that is similar to -// other data, while still compressing fast, you might look at first -// using BMDiff and then compressing the output of BMDiff with -// Snappy. - -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__ -#define THIRD_PARTY_SNAPPY_SNAPPY_H__ - -#include -#include - - - -namespace duckdb_snappy { - class Source; - class Sink; - - // ------------------------------------------------------------------------ - // Generic compression/decompression routines. - // ------------------------------------------------------------------------ - - // Compress the bytes read from "*source" and append to "*sink". Return the - // number of bytes written. - size_t Compress(Source* source, Sink* sink); - - // Find the uncompressed length of the given stream, as given by the header. - // Note that the true length could deviate from this; the stream could e.g. - // be truncated. - // - // Also note that this leaves "*source" in a state that is unsuitable for - // further operations, such as RawUncompress(). You will need to rewind - // or recreate the source yourself before attempting any further calls. - bool GetUncompressedLength(Source* source, uint32* result); - - // ------------------------------------------------------------------------ - // Higher-level string based routines (should be sufficient for most users) - // ------------------------------------------------------------------------ - - // Sets "*output" to the compressed version of "input[0,input_length-1]". - // Original contents of *output are lost. - // - // REQUIRES: "input[]" is not an alias of "*output". - size_t Compress(const char* input, size_t input_length, string* output); - - // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". - // Original contents of "*uncompressed" are lost. - // - // REQUIRES: "compressed[]" is not an alias of "*uncompressed". - // - // returns false if the message is corrupted and could not be decompressed - bool Uncompress(const char* compressed, size_t compressed_length, - string* uncompressed); - - // Decompresses "compressed" to "*uncompressed". - // - // returns false if the message is corrupted and could not be decompressed - bool Uncompress(Source* compressed, Sink* uncompressed); - - // This routine uncompresses as much of the "compressed" as possible - // into sink. It returns the number of valid bytes added to sink - // (extra invalid bytes may have been added due to errors; the caller - // should ignore those). The emitted data typically has length - // GetUncompressedLength(), but may be shorter if an error is - // encountered. - size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed); - - // ------------------------------------------------------------------------ - // Lower-level character array based routines. May be useful for - // efficiency reasons in certain circumstances. - // ------------------------------------------------------------------------ - - // REQUIRES: "compressed" must point to an area of memory that is at - // least "MaxCompressedLength(input_length)" bytes in length. - // - // Takes the data stored in "input[0..input_length]" and stores - // it in the array pointed to by "compressed". - // - // "*compressed_length" is set to the length of the compressed output. - // - // Example: - // char* output = new char[snappy::MaxCompressedLength(input_length)]; - // size_t output_length; - // RawCompress(input, input_length, output, &output_length); - // ... Process(output, output_length) ... - // delete [] output; - void RawCompress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length); - - // Given data in "compressed[0..compressed_length-1]" generated by - // calling the Snappy::Compress routine, this routine - // stores the uncompressed data to - // uncompressed[0..GetUncompressedLength(compressed)-1] - // returns false if the message is corrupted and could not be decrypted - bool RawUncompress(const char* compressed, size_t compressed_length, - char* uncompressed); - - // Given data from the byte source 'compressed' generated by calling - // the Snappy::Compress routine, this routine stores the uncompressed - // data to - // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] - // returns false if the message is corrupted and could not be decrypted - bool RawUncompress(Source* compressed, char* uncompressed); - - // Given data in "compressed[0..compressed_length-1]" generated by - // calling the Snappy::Compress routine, this routine - // stores the uncompressed data to the iovec "iov". The number of physical - // buffers in "iov" is given by iov_cnt and their cumulative size - // must be at least GetUncompressedLength(compressed). The individual buffers - // in "iov" must not overlap with each other. - // - // returns false if the message is corrupted and could not be decrypted - bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, - const struct iovec* iov, size_t iov_cnt); - - // Given data from the byte source 'compressed' generated by calling - // the Snappy::Compress routine, this routine stores the uncompressed - // data to the iovec "iov". The number of physical - // buffers in "iov" is given by iov_cnt and their cumulative size - // must be at least GetUncompressedLength(compressed). The individual buffers - // in "iov" must not overlap with each other. - // - // returns false if the message is corrupted and could not be decrypted - bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, - size_t iov_cnt); - - // Returns the maximal size of the compressed representation of - // input data that is "source_bytes" bytes in length; - size_t MaxCompressedLength(size_t source_bytes); - - // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() - // Returns true and stores the length of the uncompressed data in - // *result normally. Returns false on parsing error. - // This operation takes O(1) time. - bool GetUncompressedLength(const char* compressed, size_t compressed_length, - size_t* result); - - // Returns true iff the contents of "compressed[]" can be uncompressed - // successfully. Does not return the uncompressed data. Takes - // time proportional to compressed_length, but is usually at least - // a factor of four faster than actual decompression. - bool IsValidCompressedBuffer(const char* compressed, - size_t compressed_length); - - // Returns true iff the contents of "compressed" can be uncompressed - // successfully. Does not return the uncompressed data. Takes - // time proportional to *compressed length, but is usually at least - // a factor of four faster than actual decompression. - // On success, consumes all of *compressed. On failure, consumes an - // unspecified prefix of *compressed. - bool IsValidCompressed(Source* compressed); - -} // end namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #3 -// See the end of this file for a list - -// Copyright 2008 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Internals shared between the Snappy implementation and its unittest. - -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ -#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ - - - -namespace duckdb_snappy { -namespace internal { - -// Working memory performs a single allocation to hold all scratch space -// required for compression. -class WorkingMemory { - public: - explicit WorkingMemory(size_t input_size); - ~WorkingMemory(); - - // Allocates and clears a hash table using memory in "*this", - // stores the number of buckets in "*table_size" and returns a pointer to - // the base of the hash table. - uint16* GetHashTable(size_t fragment_size, int* table_size) const; - char* GetScratchInput() const { return input_; } - char* GetScratchOutput() const { return output_; } - - private: - char* mem_; // the allocated memory, never nullptr - size_t size_; // the size of the allocated memory, never 0 - uint16* table_; // the pointer to the hashtable - char* input_; // the pointer to the input scratch buffer - char* output_; // the pointer to the output scratch buffer - - // No copying - WorkingMemory(const WorkingMemory&); - void operator=(const WorkingMemory&); -}; - -// Flat array compression that does not emit the "uncompressed length" -// prefix. Compresses "input" string to the "*op" buffer. -// -// REQUIRES: "input_length <= kBlockSize" -// REQUIRES: "op" points to an array of memory that is at least -// "MaxCompressedLength(input_length)" in size. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two -// -// Returns an "end" pointer into "op" buffer. -// "end - op" is the compressed size of "input". -char* CompressFragment(const char* input, - size_t input_length, - char* op, - uint16* table, - const int table_size); - -// Find the largest n such that -// -// s1[0,n-1] == s2[0,n-1] -// and n <= (s2_limit - s2). -// -// Return make_pair(n, n < 8). -// Does not read *s2_limit or beyond. -// Does not read *(s1 + (s2_limit - s2)) or beyond. -// Requires that s2_limit >= s2. -// -// Separate implementation for 64-bit, little-endian cpus. -#if !defined(SNAPPY_IS_BIG_ENDIAN) && \ - (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)) -static inline std::pair FindMatchLength(const char* s1, - const char* s2, - const char* s2_limit) { - assert(s2_limit >= s2); - size_t matched = 0; - - // This block isn't necessary for correctness; we could just start looping - // immediately. As an optimization though, it is useful. It creates some not - // uncommon code paths that determine, without extra effort, whether the match - // length is less than 8. In short, we are hoping to avoid a conditional - // branch, and perhaps get better code layout from the C++ compiler. - if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { - uint64 a1 = UNALIGNED_LOAD64(s1); - uint64 a2 = UNALIGNED_LOAD64(s2); - if (a1 != a2) { - return std::pair(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3, - true); - } else { - matched = 8; - s2 += 8; - } - } - - // Find out how long the match is. We loop over the data 64 bits at a - // time until we find a 64-bit block that doesn't match; then we find - // the first non-matching bit and use that to calculate the total - // length of the match. - while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { - if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { - s2 += 8; - matched += 8; - } else { - uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero64(x); - matched += matching_bits >> 3; - assert(matched >= 8); - return std::pair(matched, false); - } - } - while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) { - if (s1[matched] == *s2) { - ++s2; - ++matched; - } else { - return std::pair(matched, matched < 8); - } - } - return std::pair(matched, matched < 8); -} -#else -static inline std::pair FindMatchLength(const char* s1, - const char* s2, - const char* s2_limit) { - // Implementation based on the x86-64 version, above. - assert(s2_limit >= s2); - int matched = 0; - - while (s2 <= s2_limit - 4 && - UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { - s2 += 4; - matched += 4; - } - if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { - uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero(x); - matched += matching_bits >> 3; - } else { - while ((s2 < s2_limit) && (s1[matched] == *s2)) { - ++s2; - ++matched; - } - } - return std::pair(matched, matched < 8); -} -#endif - -// Lookup tables for decompression code. Give --snappy_dump_decompression_table -// to the unit test to recompute char_table. - -enum { - LITERAL = 0, - COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode - COPY_2_BYTE_OFFSET = 2, - COPY_4_BYTE_OFFSET = 3 -}; -static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset. - -// Data stored per entry in lookup table: -// Range Bits-used Description -// ------------------------------------ -// 1..64 0..7 Literal/copy length encoded in opcode byte -// 0..7 8..10 Copy offset encoded in opcode byte / 256 -// 0..4 11..13 Extra bytes after opcode -// -// We use eight bits for the length even though 7 would have sufficed -// because of efficiency reasons: -// (1) Extracting a byte is faster than a bit-field -// (2) It properly aligns copy offset so we do not need a <<8 -static const uint16 char_table[256] = { - 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, - 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, - 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, - 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, - 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, - 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, - 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, - 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, - 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, - 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, - 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, - 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, - 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, - 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, - 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, - 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, - 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, - 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, - 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, - 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, - 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, - 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, - 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, - 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, - 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, - 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, - 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, - 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, - 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, - 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, - 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, - 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 -}; - -} // end namespace internal - - -// The size of a compression block. Note that many parts of the compression -// code assumes that kBlockSize <= 65536; in particular, the hash table -// can only store 16-bit offsets, and EmitCopy() also assumes the offset -// is 65535 bytes or less. Note also that if you change this, it will -// affect the framing format (see framing_format.txt). -// -// Note that there might be older data around that is compressed with larger -// block sizes, so the decompression code should not rely on the -// non-existence of long backreferences. -static const int kBlockLog = 16; -static const size_t kBlockSize = 1 << kBlockLog; - -static const int kMaxHashTableBits = 14; -static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; - - -} // end namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ - - -// LICENSE_CHANGE_END - - - -#if !defined(SNAPPY_HAVE_SSSE3) -// __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD -// support between SSE2 and AVX (so SSSE3 instructions require AVX support), and -// defines __AVX__ when AVX support is available. -#if defined(__SSSE3__) || defined(__AVX__) -#define SNAPPY_HAVE_SSSE3 1 -#else -#define SNAPPY_HAVE_SSSE3 0 -#endif -#endif // !defined(SNAPPY_HAVE_SSSE3) - -#if !defined(SNAPPY_HAVE_BMI2) -// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2 -// specifically, but it does define __AVX2__ when AVX2 support is available. -// Fortunately, AVX2 was introduced in Haswell, just like BMI2. -// -// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So, -// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which -// case issuing BMI2 instructions results in a compiler error. -#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) -#define SNAPPY_HAVE_BMI2 1 -#else -#define SNAPPY_HAVE_BMI2 0 -#endif -#endif // !defined(SNAPPY_HAVE_BMI2) - -#if SNAPPY_HAVE_SSSE3 -// Please do not replace with . or with headers that assume more -// advanced SSE versions without checking with all the OWNERS. -#include -#endif - -#if SNAPPY_HAVE_BMI2 -// Please do not replace with . or with headers that assume more -// advanced SSE versions without checking with all the OWNERS. -#include -#endif - -#include - -#include -#include -#include - -namespace duckdb_snappy { - -using internal::COPY_1_BYTE_OFFSET; -using internal::COPY_2_BYTE_OFFSET; -using internal::LITERAL; -using internal::char_table; -using internal::kMaximumTagLength; - -// Any hash function will produce a valid compressed bitstream, but a good -// hash function reduces the number of collisions and thus yields better -// compression for compressible input, and more speed for incompressible -// input. Of course, it doesn't hurt if the hash function is reasonably fast -// either, as it gets called a lot. -static inline uint32 HashBytes(uint32 bytes, int shift) { - uint32 kMul = 0x1e35a7bd; - return (bytes * kMul) >> shift; -} -static inline uint32 Hash(const char* p, int shift) { - return HashBytes(UNALIGNED_LOAD32(p), shift); -} - -size_t MaxCompressedLength(size_t source_len) { - // Compressed data can be defined as: - // compressed := item* literal* - // item := literal* copy - // - // The trailing literal sequence has a space blowup of at most 62/60 - // since a literal of length 60 needs one tag byte + one extra byte - // for length information. - // - // Item blowup is trickier to measure. Suppose the "copy" op copies - // 4 bytes of data. Because of a special check in the encoding code, - // we produce a 4-byte copy only if the offset is < 65536. Therefore - // the copy op takes 3 bytes to encode, and this type of item leads - // to at most the 62/60 blowup for representing literals. - // - // Suppose the "copy" op copies 5 bytes of data. If the offset is big - // enough, it will take 5 bytes to encode the copy op. Therefore the - // worst case here is a one-byte literal followed by a five-byte copy. - // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. - // - // This last factor dominates the blowup, so the final estimate is: - return 32 + source_len + source_len/6; -} - -namespace { - -void UnalignedCopy64(const void* src, void* dst) { - char tmp[8]; - memcpy(tmp, src, 8); - memcpy(dst, tmp, 8); -} - -void UnalignedCopy128(const void* src, void* dst) { - // memcpy gets vectorized when the appropriate compiler options are used. - // For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load - // and store. - char tmp[16]; - memcpy(tmp, src, 16); - memcpy(dst, tmp, 16); -} - -// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used -// for handling COPY operations where the input and output regions may overlap. -// For example, suppose: -// src == "ab" -// op == src + 2 -// op_limit == op + 20 -// After IncrementalCopySlow(src, op, op_limit), the result will have eleven -// copies of "ab" -// ababababababababababab -// Note that this does not match the semantics of either memcpy() or memmove(). -inline char* IncrementalCopySlow(const char* src, char* op, - char* const op_limit) { - // TODO: Remove pragma when LLVM is aware this - // function is only called in cold regions and when cold regions don't get - // vectorized or unrolled. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - while (op < op_limit) { - *op++ = *src++; - } - return op_limit; -} - -#if SNAPPY_HAVE_SSSE3 - -// This is a table of shuffle control masks that can be used as the source -// operand for PSHUFB to permute the contents of the destination XMM register -// into a repeating byte pattern. -alignas(16) const char pshufb_fill_patterns[7][16] = { - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, - {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0}, - {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, - {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0}, - {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3}, - {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1}, -}; - -#endif // SNAPPY_HAVE_SSSE3 - -// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than -// IncrementalCopySlow. buf_limit is the address past the end of the writable -// region of the buffer. -inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, - char* const buf_limit) { - // Terminology: - // - // slop = buf_limit - op - // pat = op - src - // len = limit - op - assert(src < op); - assert(op <= op_limit); - assert(op_limit <= buf_limit); - // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that - // to optimize this function but we have to also handle other cases in case - // the input does not satisfy these conditions. - - size_t pattern_size = op - src; - // The cases are split into different branches to allow the branch predictor, - // FDO, and static prediction hints to work better. For each input we list the - // ratio of invocations that match each condition. - // - // input slop < 16 pat < 8 len > 16 - // ------------------------------------------ - // html|html4|cp 0% 1.01% 27.73% - // urls 0% 0.88% 14.79% - // jpg 0% 64.29% 7.14% - // pdf 0% 2.56% 58.06% - // txt[1-4] 0% 0.23% 0.97% - // pb 0% 0.96% 13.88% - // bin 0.01% 22.27% 41.17% - // - // It is very rare that we don't have enough slop for doing block copies. It - // is also rare that we need to expand a pattern. Small patterns are common - // for incompressible formats and for those we are plenty fast already. - // Lengths are normally not greater than 16 but they vary depending on the - // input. In general if we always predict len <= 16 it would be an ok - // prediction. - // - // In order to be fast we want a pattern >= 8 bytes and an unrolled loop - // copying 2x 8 bytes at a time. - - // Handle the uncommon case where pattern is less than 8 bytes. - if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) { -#if SNAPPY_HAVE_SSSE3 - // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB - // to permute the register's contents in-place into a repeating sequence of - // the first "pattern_size" bytes. - // For example, suppose: - // src == "abc" - // op == op + 3 - // After _mm_shuffle_epi8(), "pattern" will have five copies of "abc" - // followed by one byte of slop: abcabcabcabcabca. - // - // The non-SSE fallback implementation suffers from store-forwarding stalls - // because its loads and stores partly overlap. By expanding the pattern - // in-place, we avoid the penalty. - if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) { - const __m128i shuffle_mask = _mm_load_si128( - reinterpret_cast(pshufb_fill_patterns) - + pattern_size - 1); - const __m128i pattern = _mm_shuffle_epi8( - _mm_loadl_epi64(reinterpret_cast(src)), shuffle_mask); - // Uninitialized bytes are masked out by the shuffle mask. - // TODO: remove annotation and macro defs once MSan is fixed. - SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern)); - pattern_size *= 16 / pattern_size; - char* op_end = std::min(op_limit, buf_limit - 15); - while (op < op_end) { - _mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern); - op += pattern_size; - } - if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; - } - return IncrementalCopySlow(src, op, op_limit); -#else // !SNAPPY_HAVE_SSSE3 - // If plenty of buffer space remains, expand the pattern to at least 8 - // bytes. The way the following loop is written, we need 8 bytes of buffer - // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10 - // bytes if pattern_size is 2. Precisely encoding that is probably not - // worthwhile; instead, invoke the slow path if we cannot write 11 bytes - // (because 11 are required in the worst case). - if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) { - while (pattern_size < 8) { - UnalignedCopy64(src, op); - op += pattern_size; - pattern_size *= 2; - } - if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; - } else { - return IncrementalCopySlow(src, op, op_limit); - } -#endif // SNAPPY_HAVE_SSSE3 - } - assert(pattern_size >= 8); - - // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single - // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe - // because expanding the pattern to at least 8 bytes guarantees that - // op - src >= 8. - // - // Typically, the op_limit is the gating factor so try to simplify the loop - // based on that. - if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) { - // Factor the displacement from op to the source into a variable. This helps - // simplify the loop below by only varying the op pointer which we need to - // test for the end. Note that this was done after carefully examining the - // generated code to allow the addressing modes in the loop below to - // maximize micro-op fusion where possible on modern Intel processors. The - // generated code should be checked carefully for new processors or with - // major changes to the compiler. - // TODO: Simplify this code when the compiler reliably produces - // the correct x86 instruction sequence. - ptrdiff_t op_to_src = src - op; - - // The trip count of this loop is not large and so unrolling will only hurt - // code size without helping performance. - // - // TODO: Replace with loop trip count hint. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - do { - UnalignedCopy64(op + op_to_src, op); - UnalignedCopy64(op + op_to_src + 8, op + 8); - op += 16; - } while (op < op_limit); - return op_limit; - } - - // Fall back to doing as much as we can with the available slop in the - // buffer. This code path is relatively cold however so we save code size by - // avoiding unrolling and vectorizing. - // - // TODO: Remove pragma when when cold regions don't get vectorized - // or unrolled. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) { - UnalignedCopy64(src, op); - UnalignedCopy64(src + 8, op + 8); - } - if (op >= op_limit) - return op_limit; - - // We only take this branch if we didn't have enough slop and we can do a - // single 8 byte copy. - if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) { - UnalignedCopy64(src, op); - src += 8; - op += 8; - } - return IncrementalCopySlow(src, op, op_limit); -} - -} // namespace - -template -static inline char* EmitLiteral(char* op, - const char* literal, - int len) { - // The vast majority of copies are below 16 bytes, for which a - // call to memcpy is overkill. This fast path can sometimes - // copy up to 15 bytes too much, but that is okay in the - // main loop, since we have a bit to go on for both sides: - // - // - The input will always have kInputMarginBytes = 15 extra - // available bytes, as long as we're in the main loop, and - // if not, allow_fast_path = false. - // - The output will always have 32 spare bytes (see - // MaxCompressedLength). - assert(len > 0); // Zero-length literals are disallowed - int n = len - 1; - if (allow_fast_path && len <= 16) { - // Fits in tag byte - *op++ = LITERAL | (n << 2); - - UnalignedCopy128(literal, op); - return op + len; - } - - if (n < 60) { - // Fits in tag byte - *op++ = LITERAL | (n << 2); - } else { - int count = (Bits::Log2Floor(n) >> 3) + 1; - assert(count >= 1); - assert(count <= 4); - *op++ = LITERAL | ((59 + count) << 2); - // Encode in upcoming bytes. - // Write 4 bytes, though we may care about only 1 of them. The output buffer - // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds - // here and there is a memcpy of size 'len' below. - LittleEndian::Store32(op, n); - op += count; - } - memcpy(op, literal, len); - return op + len; -} - -template -static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) { - assert(len <= 64); - assert(len >= 4); - assert(offset < 65536); - assert(len_less_than_12 == (len < 12)); - - if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) { - // offset fits in 11 bits. The 3 highest go in the top of the first byte, - // and the rest go in the second byte. - *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0); - *op++ = offset & 0xff; - } else { - // Write 4 bytes, though we only care about 3 of them. The output buffer - // is required to have some slack, so the extra byte won't overrun it. - uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8); - LittleEndian::Store32(op, u); - op += 3; - } - return op; -} - -template -static inline char* EmitCopy(char* op, size_t offset, size_t len) { - assert(len_less_than_12 == (len < 12)); - if (len_less_than_12) { - return EmitCopyAtMost64(op, offset, len); - } else { - // A special case for len <= 64 might help, but so far measurements suggest - // it's in the noise. - - // Emit 64 byte copies but make sure to keep at least four bytes reserved. - while (SNAPPY_PREDICT_FALSE(len >= 68)) { - op = EmitCopyAtMost64(op, offset, 64); - len -= 64; - } - - // One or two copies will now finish the job. - if (len > 64) { - op = EmitCopyAtMost64(op, offset, 60); - len -= 60; - } - - // Emit remainder. - if (len < 12) { - op = EmitCopyAtMost64(op, offset, len); - } else { - op = EmitCopyAtMost64(op, offset, len); - } - return op; - } -} - -bool GetUncompressedLength(const char* start, size_t n, size_t* result) { - uint32 v = 0; - const char* limit = start + n; - if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { - *result = v; - return true; - } else { - return false; - } -} - -namespace { -uint32 CalculateTableSize(uint32 input_size) { - assert(kMaxHashTableSize >= 256); - if (input_size > kMaxHashTableSize) { - return kMaxHashTableSize; - } - if (input_size < 256) { - return 256; - } - // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1. - // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)). - return 2u << Bits::Log2Floor(input_size - 1); -} -} // namespace - -namespace internal { -WorkingMemory::WorkingMemory(size_t input_size) { - const size_t max_fragment_size = std::min(input_size, kBlockSize); - const size_t table_size = CalculateTableSize(max_fragment_size); - size_ = table_size * sizeof(*table_) + max_fragment_size + - MaxCompressedLength(max_fragment_size); - mem_ = std::allocator().allocate(size_); - table_ = reinterpret_cast(mem_); - input_ = mem_ + table_size * sizeof(*table_); - output_ = input_ + max_fragment_size; -} - -WorkingMemory::~WorkingMemory() { - std::allocator().deallocate(mem_, size_); -} - -uint16* WorkingMemory::GetHashTable(size_t fragment_size, - int* table_size) const { - const size_t htsize = CalculateTableSize(fragment_size); - memset(table_, 0, htsize * sizeof(*table_)); - *table_size = htsize; - return table_; -} -} // end namespace internal - -// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will -// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have -// empirically found that overlapping loads such as -// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) -// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. -// -// We have different versions for 64- and 32-bit; ideally we would avoid the -// two functions and just inline the UNALIGNED_LOAD64 call into -// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever -// enough to avoid loading the value multiple times then. For 64-bit, the load -// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is -// done at GetUint32AtOffset() time. - -#ifdef ARCH_K8 - -typedef uint64 EightBytesReference; - -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return UNALIGNED_LOAD64(ptr); -} - -static inline uint32 GetUint32AtOffset(uint64 v, int offset) { - assert(offset >= 0); - assert(offset <= 4); - return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); -} - -#else - -typedef const char* EightBytesReference; - -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return ptr; -} - -static inline uint32 GetUint32AtOffset(const char* v, int offset) { - assert(offset >= 0); - assert(offset <= 4); - return UNALIGNED_LOAD32(v + offset); -} - -#endif - -// Flat array compression that does not emit the "uncompressed length" -// prefix. Compresses "input" string to the "*op" buffer. -// -// REQUIRES: "input" is at most "kBlockSize" bytes long. -// REQUIRES: "op" points to an array of memory that is at least -// "MaxCompressedLength(input.size())" in size. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two -// -// Returns an "end" pointer into "op" buffer. -// "end - op" is the compressed size of "input". -namespace internal { -char* CompressFragment(const char* input, - size_t input_size, - char* op, - uint16* table, - const int table_size) { - // "ip" is the input pointer, and "op" is the output pointer. - const char* ip = input; - assert(input_size <= kBlockSize); - assert((table_size & (table_size - 1)) == 0); // table must be power of two - const int shift = 32 - Bits::Log2Floor(table_size); - // assert(static_cast(kuint32max >> shift) == table_size - 1); - const char* ip_end = input + input_size; - const char* base_ip = ip; - // Bytes in [next_emit, ip) will be emitted as literal bytes. Or - // [next_emit, ip_end) after the main loop. - const char* next_emit = ip; - - const size_t kInputMarginBytes = 15; - if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { - const char* ip_limit = input + input_size - kInputMarginBytes; - - for (uint32 next_hash = Hash(++ip, shift); ; ) { - assert(next_emit < ip); - // The body of this loop calls EmitLiteral once and then EmitCopy one or - // more times. (The exception is that when we're close to exhausting - // the input we goto emit_remainder.) - // - // In the first iteration of this loop we're just starting, so - // there's nothing to copy, so calling EmitLiteral once is - // necessary. And we only start a new iteration when the - // current iteration has determined that a call to EmitLiteral will - // precede the next call to EmitCopy (if any). - // - // Step 1: Scan forward in the input looking for a 4-byte-long match. - // If we get close to exhausting the input then goto emit_remainder. - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned (or skipped), look at every third byte, etc.. When a match is - // found, immediately go back to looking at every byte. This is a small - // loss (~5% performance, ~0.1% density) for compressible data due to more - // bookkeeping, but for non-compressible data (such as JPEG) it's a huge - // win since the compressor quickly "realizes" the data is incompressible - // and doesn't bother looking for matches everywhere. - // - // The "skip" variable keeps track of how many bytes there are since the - // last match; dividing it by 32 (ie. right-shifting by five) gives the - // number of bytes to move ahead for each iteration. - uint32 skip = 32; - - const char* next_ip = ip; - const char* candidate; - do { - ip = next_ip; - uint32 hash = next_hash; - assert(hash == Hash(ip, shift)); - uint32 bytes_between_hash_lookups = skip >> 5; - skip += bytes_between_hash_lookups; - next_ip = ip + bytes_between_hash_lookups; - if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) { - goto emit_remainder; - } - next_hash = Hash(next_ip, shift); - candidate = base_ip + table[hash]; - assert(candidate >= base_ip); - assert(candidate < ip); - - table[hash] = ip - base_ip; - } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) != - UNALIGNED_LOAD32(candidate))); - - // Step 2: A 4-byte match has been found. We'll later see if more - // than 4 bytes match. But, prior to the match, input - // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." - assert(next_emit + 16 <= ip_end); - op = EmitLiteral(op, next_emit, ip - next_emit); - - // Step 3: Call EmitCopy, and then see if another EmitCopy could - // be our next move. Repeat until we find no match for the - // input immediately after what was consumed by the last EmitCopy call. - // - // If we exit this loop normally then we need to call EmitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can exit - // this loop via goto if we get close to exhausting the input. - EightBytesReference input_bytes; - uint32 candidate_bytes = 0; - - do { - // We have a 4-byte match at ip, and no need to emit any - // "literal bytes" prior to ip. - const char* base = ip; - std::pair p = - FindMatchLength(candidate + 4, ip + 4, ip_end); - size_t matched = 4 + p.first; - ip += matched; - size_t offset = base - candidate; - assert(0 == memcmp(base, candidate, matched)); - if (p.second) { - op = EmitCopy(op, offset, matched); - } else { - op = EmitCopy(op, offset, matched); - } - next_emit = ip; - if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { - goto emit_remainder; - } - // We are now looking for a 4-byte match again. We read - // table[Hash(ip, shift)] for that. To improve compression, - // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)]. - input_bytes = GetEightBytesAt(ip - 1); - uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); - table[prev_hash] = ip - base_ip - 1; - uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); - candidate = base_ip + table[cur_hash]; - candidate_bytes = UNALIGNED_LOAD32(candidate); - table[cur_hash] = ip - base_ip; - } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); - - next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); - ++ip; - } - } - - emit_remainder: - // Emit the remaining bytes as a literal - if (next_emit < ip_end) { - op = EmitLiteral(op, next_emit, - ip_end - next_emit); - } - - return op; -} -} // end namespace internal - -// Called back at avery compression call to trace parameters and sizes. -static inline void Report(const char *algorithm, size_t compressed_size, - size_t uncompressed_size) {} - -// Signature of output types needed by decompression code. -// The decompression code is templatized on a type that obeys this -// signature so that we do not pay virtual function call overhead in -// the middle of a tight decompression loop. -// -// class DecompressionWriter { -// public: -// // Called before decompression -// void SetExpectedLength(size_t length); -// -// // Called after decompression -// bool CheckLength() const; -// -// // Called repeatedly during decompression -// bool Append(const char* ip, size_t length); -// bool AppendFromSelf(uint32 offset, size_t length); -// -// // The rules for how TryFastAppend differs from Append are somewhat -// // convoluted: -// // -// // - TryFastAppend is allowed to decline (return false) at any -// // time, for any reason -- just "return false" would be -// // a perfectly legal implementation of TryFastAppend. -// // The intention is for TryFastAppend to allow a fast path -// // in the common case of a small append. -// // - TryFastAppend is allowed to read up to bytes -// // from the input buffer, whereas Append is allowed to read -// // . However, if it returns true, it must leave -// // at least five (kMaximumTagLength) bytes in the input buffer -// // afterwards, so that there is always enough space to read the -// // next tag without checking for a refill. -// // - TryFastAppend must always return decline (return false) -// // if is 61 or more, as in this case the literal length is not -// // decoded fully. In practice, this should not be a big problem, -// // as it is unlikely that one would implement a fast path accepting -// // this much data. -// // -// bool TryFastAppend(const char* ip, size_t available, size_t length); -// }; - -static inline uint32 ExtractLowBytes(uint32 v, int n) { - assert(n >= 0); - assert(n <= 4); -#if SNAPPY_HAVE_BMI2 - return _bzhi_u32(v, 8 * n); -#else - // This needs to be wider than uint32 otherwise `mask << 32` will be - // undefined. - uint64 mask = 0xffffffff; - return v & ~(mask << (8 * n)); -#endif -} - -static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { - assert(shift < 32); - static const uint8 masks[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}; - return (value & masks[shift]) != 0; -} - -// Helper class for decompression -class SnappyDecompressor { - private: - Source* reader_; // Underlying source of bytes to decompress - const char* ip_; // Points to next buffered byte - const char* ip_limit_; // Points just past buffered bytes - uint32 peeked_; // Bytes peeked from reader (need to skip) - bool eof_; // Hit end of input without an error? - char scratch_[kMaximumTagLength]; // See RefillTag(). - - // Ensure that all of the tag metadata for the next tag is available - // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even - // if (ip_limit_ - ip_ < 5). - // - // Returns true on success, false on error or end of input. - bool RefillTag(); - - public: - explicit SnappyDecompressor(Source* reader) - : reader_(reader), - ip_(NULL), - ip_limit_(NULL), - peeked_(0), - eof_(false) { - } - - ~SnappyDecompressor() { - // Advance past any bytes we peeked at from the reader - reader_->Skip(peeked_); - } - - // Returns true iff we have hit the end of the input without an error. - bool eof() const { - return eof_; - } - - // Read the uncompressed length stored at the start of the compressed data. - // On success, stores the length in *result and returns true. - // On failure, returns false. - bool ReadUncompressedLength(uint32* result) { - assert(ip_ == NULL); // Must not have read anything yet - // Length is encoded in 1..5 bytes - *result = 0; - uint32 shift = 0; - while (true) { - if (shift >= 32) return false; - size_t n; - const char* ip = reader_->Peek(&n); - if (n == 0) return false; - const unsigned char c = *(reinterpret_cast(ip)); - reader_->Skip(1); - uint32 val = c & 0x7f; - if (LeftShiftOverflows(static_cast(val), shift)) return false; - *result |= val << shift; - if (c < 128) { - break; - } - shift += 7; - } - return true; - } - - // Process the next item found in the input. - // Returns true if successful, false on error or end of input. - template -#if defined(__GNUC__) && defined(__x86_64__) - __attribute__((aligned(32))) -#endif - void DecompressAllTags(Writer* writer) { - // In x86, pad the function body to start 16 bytes later. This function has - // a couple of hotspots that are highly sensitive to alignment: we have - // observed regressions by more than 20% in some metrics just by moving the - // exact same code to a different position in the benchmark binary. - // - // Putting this code on a 32-byte-aligned boundary + 16 bytes makes us hit - // the "lucky" case consistently. Unfortunately, this is a very brittle - // workaround, and future differences in code generation may reintroduce - // this regression. If you experience a big, difficult to explain, benchmark - // performance regression here, first try removing this hack. -#if defined(__GNUC__) && defined(__x86_64__) - // Two 8-byte "NOP DWORD ptr [EAX + EAX*1 + 00000000H]" instructions. - asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); - asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); -#endif - - const char* ip = ip_; - // We could have put this refill fragment only at the beginning of the loop. - // However, duplicating it at the end of each branch gives the compiler more - // scope to optimize the expression based on the local - // context, which overall increases speed. - #define MAYBE_REFILL() \ - if (ip_limit_ - ip < kMaximumTagLength) { \ - ip_ = ip; \ - if (!RefillTag()) return; \ - ip = ip_; \ - } - - MAYBE_REFILL(); - for ( ;; ) { - const unsigned char c = *(reinterpret_cast(ip++)); - - // Ratio of iterations that have LITERAL vs non-LITERAL for different - // inputs. - // - // input LITERAL NON_LITERAL - // ----------------------------------- - // html|html4|cp 23% 77% - // urls 36% 64% - // jpg 47% 53% - // pdf 19% 81% - // txt[1-4] 25% 75% - // pb 24% 76% - // bin 24% 76% - if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) { - size_t literal_length = (c >> 2) + 1u; - if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { - assert(literal_length < 61); - ip += literal_length; - // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend() - // will not return true unless there's already at least five spare - // bytes in addition to the literal. - continue; - } - if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) { - // Long literal. - const size_t literal_length_length = literal_length - 60; - literal_length = - ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) + - 1; - ip += literal_length_length; - } - - size_t avail = ip_limit_ - ip; - while (avail < literal_length) { - if (!writer->Append(ip, avail)) return; - literal_length -= avail; - reader_->Skip(peeked_); - size_t n; - ip = reader_->Peek(&n); - avail = n; - peeked_ = avail; - if (avail == 0) return; // Premature end of input - ip_limit_ = ip + avail; - } - if (!writer->Append(ip, literal_length)) { - return; - } - ip += literal_length; - MAYBE_REFILL(); - } else { - const size_t entry = char_table[c]; - const size_t trailer = - ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11); - const size_t length = entry & 0xff; - ip += entry >> 11; - - // copy_offset/256 is encoded in bits 8..10. By just fetching - // those bits, we get copy_offset (since the bit-field starts at - // bit 8). - const size_t copy_offset = entry & 0x700; - if (!writer->AppendFromSelf(copy_offset + trailer, length)) { - return; - } - MAYBE_REFILL(); - } - } - -#undef MAYBE_REFILL - } -}; - -bool SnappyDecompressor::RefillTag() { - const char* ip = ip_; - if (ip == ip_limit_) { - // Fetch a new fragment from the reader - reader_->Skip(peeked_); // All peeked bytes are used up - size_t n; - ip = reader_->Peek(&n); - peeked_ = n; - eof_ = (n == 0); - if (eof_) return false; - ip_limit_ = ip + n; - } - - // Read the tag character - assert(ip < ip_limit_); - const unsigned char c = *(reinterpret_cast(ip)); - const uint32 entry = char_table[c]; - const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' - assert(needed <= sizeof(scratch_)); - - // Read more bytes from reader if needed - uint32 nbuf = ip_limit_ - ip; - if (nbuf < needed) { - // Stitch together bytes from ip and reader to form the word - // contents. We store the needed bytes in "scratch_". They - // will be consumed immediately by the caller since we do not - // read more than we need. - memmove(scratch_, ip, nbuf); - reader_->Skip(peeked_); // All peeked bytes are used up - peeked_ = 0; - while (nbuf < needed) { - size_t length; - const char* src = reader_->Peek(&length); - if (length == 0) return false; - uint32 to_add = std::min(needed - nbuf, length); - memcpy(scratch_ + nbuf, src, to_add); - nbuf += to_add; - reader_->Skip(to_add); - } - assert(nbuf == needed); - ip_ = scratch_; - ip_limit_ = scratch_ + needed; - } else if (nbuf < kMaximumTagLength) { - // Have enough bytes, but move into scratch_ so that we do not - // read past end of input - memmove(scratch_, ip, nbuf); - reader_->Skip(peeked_); // All peeked bytes are used up - peeked_ = 0; - ip_ = scratch_; - ip_limit_ = scratch_ + nbuf; - } else { - // Pass pointer to buffer returned by reader_. - ip_ = ip; - } - return true; -} - -template -static bool InternalUncompress(Source* r, Writer* writer) { - // Read the uncompressed length from the front of the compressed input - SnappyDecompressor decompressor(r); - uint32 uncompressed_len = 0; - if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; - - return InternalUncompressAllTags(&decompressor, writer, r->Available(), - uncompressed_len); -} - -template -static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, - Writer* writer, - uint32 compressed_len, - uint32 uncompressed_len) { - Report("snappy_uncompress", compressed_len, uncompressed_len); - - writer->SetExpectedLength(uncompressed_len); - - // Process the entire input - decompressor->DecompressAllTags(writer); - writer->Flush(); - return (decompressor->eof() && writer->CheckLength()); -} - -bool GetUncompressedLength(Source* source, uint32* result) { - SnappyDecompressor decompressor(source); - return decompressor.ReadUncompressedLength(result); -} - -size_t Compress(Source* reader, Sink* writer) { - size_t written = 0; - size_t N = reader->Available(); - const size_t uncompressed_size = N; - char ulength[Varint::kMax32]; - char* p = Varint::Encode32(ulength, N); - writer->Append(ulength, p-ulength); - written += (p - ulength); - - internal::WorkingMemory wmem(N); - - while (N > 0) { - // Get next block to compress (without copying if possible) - size_t fragment_size; - const char* fragment = reader->Peek(&fragment_size); - assert(fragment_size != 0); // premature end of input - const size_t num_to_read = std::min(N, kBlockSize); - size_t bytes_read = fragment_size; - - size_t pending_advance = 0; - if (bytes_read >= num_to_read) { - // Buffer returned by reader is large enough - pending_advance = num_to_read; - fragment_size = num_to_read; - } else { - char* scratch = wmem.GetScratchInput(); - memcpy(scratch, fragment, bytes_read); - reader->Skip(bytes_read); - - while (bytes_read < num_to_read) { - fragment = reader->Peek(&fragment_size); - size_t n = std::min(fragment_size, num_to_read - bytes_read); - memcpy(scratch + bytes_read, fragment, n); - bytes_read += n; - reader->Skip(n); - } - assert(bytes_read == num_to_read); - fragment = scratch; - fragment_size = num_to_read; - } - assert(fragment_size == num_to_read); - - // Get encoding table for compression - int table_size; - uint16* table = wmem.GetHashTable(num_to_read, &table_size); - - // Compress input_fragment and append to dest - const int max_output = MaxCompressedLength(num_to_read); - - // Need a scratch buffer for the output, in case the byte sink doesn't - // have room for us directly. - - // Since we encode kBlockSize regions followed by a region - // which is <= kBlockSize in length, a previously allocated - // scratch_output[] region is big enough for this iteration. - char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput()); - char* end = internal::CompressFragment(fragment, fragment_size, dest, table, - table_size); - writer->Append(dest, end - dest); - written += (end - dest); - - N -= num_to_read; - reader->Skip(pending_advance); - } - - Report("snappy_compress", written, uncompressed_size); - - return written; -} - -// ----------------------------------------------------------------------- -// IOVec interfaces -// ----------------------------------------------------------------------- - -// A type that writes to an iovec. -// Note that this is not a "ByteSink", but a type that matches the -// Writer template argument to SnappyDecompressor::DecompressAllTags(). -class SnappyIOVecWriter { - private: - // output_iov_end_ is set to iov + count and used to determine when - // the end of the iovs is reached. - const struct iovec* output_iov_end_; - -#if !defined(NDEBUG) - const struct iovec* output_iov_; -#endif // !defined(NDEBUG) - - // Current iov that is being written into. - const struct iovec* curr_iov_; - - // Pointer to current iov's write location. - char* curr_iov_output_; - - // Remaining bytes to write into curr_iov_output. - size_t curr_iov_remaining_; - - // Total bytes decompressed into output_iov_ so far. - size_t total_written_; - - // Maximum number of bytes that will be decompressed into output_iov_. - size_t output_limit_; - - static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) { - return reinterpret_cast(iov->iov_base) + offset; - } - - public: - // Does not take ownership of iov. iov must be valid during the - // entire lifetime of the SnappyIOVecWriter. - inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count) - : output_iov_end_(iov + iov_count), -#if !defined(NDEBUG) - output_iov_(iov), -#endif // !defined(NDEBUG) - curr_iov_(iov), - curr_iov_output_(iov_count ? reinterpret_cast(iov->iov_base) - : nullptr), - curr_iov_remaining_(iov_count ? iov->iov_len : 0), - total_written_(0), - output_limit_(-1) {} - - inline void SetExpectedLength(size_t len) { - output_limit_ = len; - } - - inline bool CheckLength() const { - return total_written_ == output_limit_; - } - - inline bool Append(const char* ip, size_t len) { - if (total_written_ + len > output_limit_) { - return false; - } - - return AppendNoCheck(ip, len); - } - - inline bool AppendNoCheck(const char* ip, size_t len) { - while (len > 0) { - if (curr_iov_remaining_ == 0) { - // This iovec is full. Go to the next one. - if (curr_iov_ + 1 >= output_iov_end_) { - return false; - } - ++curr_iov_; - curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); - curr_iov_remaining_ = curr_iov_->iov_len; - } - - const size_t to_write = std::min(len, curr_iov_remaining_); - memcpy(curr_iov_output_, ip, to_write); - curr_iov_output_ += to_write; - curr_iov_remaining_ -= to_write; - total_written_ += to_write; - ip += to_write; - len -= to_write; - } - - return true; - } - - inline bool TryFastAppend(const char* ip, size_t available, size_t len) { - const size_t space_left = output_limit_ - total_written_; - if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 && - curr_iov_remaining_ >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, curr_iov_output_); - curr_iov_output_ += len; - curr_iov_remaining_ -= len; - total_written_ += len; - return true; - } - - return false; - } - - inline bool AppendFromSelf(size_t offset, size_t len) { - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (offset - 1u >= total_written_) { - return false; - } - const size_t space_left = output_limit_ - total_written_; - if (len > space_left) { - return false; - } - - // Locate the iovec from which we need to start the copy. - const iovec* from_iov = curr_iov_; - size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_; - while (offset > 0) { - if (from_iov_offset >= offset) { - from_iov_offset -= offset; - break; - } - - offset -= from_iov_offset; - --from_iov; -#if !defined(NDEBUG) - assert(from_iov >= output_iov_); -#endif // !defined(NDEBUG) - from_iov_offset = from_iov->iov_len; - } - - // Copy bytes starting from the iovec pointed to by from_iov_index to - // the current iovec. - while (len > 0) { - assert(from_iov <= curr_iov_); - if (from_iov != curr_iov_) { - const size_t to_copy = - std::min((unsigned long)(from_iov->iov_len - from_iov_offset), (unsigned long)len); - AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy); - len -= to_copy; - if (len > 0) { - ++from_iov; - from_iov_offset = 0; - } - } else { - size_t to_copy = curr_iov_remaining_; - if (to_copy == 0) { - // This iovec is full. Go to the next one. - if (curr_iov_ + 1 >= output_iov_end_) { - return false; - } - ++curr_iov_; - curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); - curr_iov_remaining_ = curr_iov_->iov_len; - continue; - } - if (to_copy > len) { - to_copy = len; - } - - IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset), - curr_iov_output_, curr_iov_output_ + to_copy, - curr_iov_output_ + curr_iov_remaining_); - curr_iov_output_ += to_copy; - curr_iov_remaining_ -= to_copy; - from_iov_offset += to_copy; - total_written_ += to_copy; - len -= to_copy; - } - } - - return true; - } - - inline void Flush() {} -}; - -bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, - const struct iovec* iov, size_t iov_cnt) { - ByteArraySource reader(compressed, compressed_length); - return RawUncompressToIOVec(&reader, iov, iov_cnt); -} - -bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, - size_t iov_cnt) { - SnappyIOVecWriter output(iov, iov_cnt); - return InternalUncompress(compressed, &output); -} - -// ----------------------------------------------------------------------- -// Flat array interfaces -// ----------------------------------------------------------------------- - -// A type that writes to a flat array. -// Note that this is not a "ByteSink", but a type that matches the -// Writer template argument to SnappyDecompressor::DecompressAllTags(). -class SnappyArrayWriter { - private: - char* base_; - char* op_; - char* op_limit_; - - public: - inline explicit SnappyArrayWriter(char* dst) - : base_(dst), - op_(dst), - op_limit_(dst) { - } - - inline void SetExpectedLength(size_t len) { - op_limit_ = op_ + len; - } - - inline bool CheckLength() const { - return op_ == op_limit_; - } - - inline bool Append(const char* ip, size_t len) { - char* op = op_; - const size_t space_left = op_limit_ - op; - if (space_left < len) { - return false; - } - memcpy(op, ip, len); - op_ = op + len; - return true; - } - - inline bool TryFastAppend(const char* ip, size_t available, size_t len) { - char* op = op_; - const size_t space_left = op_limit_ - op; - if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, op); - op_ = op + len; - return true; - } else { - return false; - } - } - - inline bool AppendFromSelf(size_t offset, size_t len) { - char* const op_end = op_ + len; - - // Check if we try to append from before the start of the buffer. - // Normally this would just be a check for "produced < offset", - // but "produced <= offset - 1u" is equivalent for every case - // except the one where offset==0, where the right side will wrap around - // to a very big number. This is convenient, as offset==0 is another - // invalid case that we also want to catch, so that we do not go - // into an infinite loop. - if (Produced() <= offset - 1u || op_end > op_limit_) return false; - op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_); - - return true; - } - inline size_t Produced() const { - assert(op_ >= base_); - return op_ - base_; - } - inline void Flush() {} -}; - -bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { - ByteArraySource reader(compressed, n); - return RawUncompress(&reader, uncompressed); -} - -bool RawUncompress(Source* compressed, char* uncompressed) { - SnappyArrayWriter output(uncompressed); - return InternalUncompress(compressed, &output); -} - -bool Uncompress(const char* compressed, size_t n, string* uncompressed) { - size_t ulength; - if (!GetUncompressedLength(compressed, n, &ulength)) { - return false; - } - // On 32-bit builds: max_size() < kuint32max. Check for that instead - // of crashing (e.g., consider externally specified compressed data). - if (ulength > uncompressed->max_size()) { - return false; - } - STLStringResizeUninitialized(uncompressed, ulength); - return RawUncompress(compressed, n, string_as_array(uncompressed)); -} - -// A Writer that drops everything on the floor and just does validation -class SnappyDecompressionValidator { - private: - size_t expected_; - size_t produced_; - - public: - inline SnappyDecompressionValidator() : expected_(0), produced_(0) { } - inline void SetExpectedLength(size_t len) { - expected_ = len; - } - inline bool CheckLength() const { - return expected_ == produced_; - } - inline bool Append(const char* ip, size_t len) { - produced_ += len; - return produced_ <= expected_; - } - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { - return false; - } - inline bool AppendFromSelf(size_t offset, size_t len) { - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (produced_ <= offset - 1u) return false; - produced_ += len; - return produced_ <= expected_; - } - inline void Flush() {} -}; - -bool IsValidCompressedBuffer(const char* compressed, size_t n) { - ByteArraySource reader(compressed, n); - SnappyDecompressionValidator writer; - return InternalUncompress(&reader, &writer); -} - -bool IsValidCompressed(Source* compressed) { - SnappyDecompressionValidator writer; - return InternalUncompress(compressed, &writer); -} - -void RawCompress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length) { - ByteArraySource reader(input, input_length); - UncheckedByteArraySink writer(compressed); - Compress(&reader, &writer); - - // Compute how many bytes were added - *compressed_length = (writer.CurrentDestination() - compressed); -} - -size_t Compress(const char* input, size_t input_length, string* compressed) { - // Pre-grow the buffer to the max length of the compressed output - STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length)); - - size_t compressed_length; - RawCompress(input, input_length, string_as_array(compressed), - &compressed_length); - compressed->resize(compressed_length); - return compressed_length; -} - -// ----------------------------------------------------------------------- -// Sink interface -// ----------------------------------------------------------------------- - -// A type that decompresses into a Sink. The template parameter -// Allocator must export one method "char* Allocate(int size);", which -// allocates a buffer of "size" and appends that to the destination. -template -class SnappyScatteredWriter { - Allocator allocator_; - - // We need random access into the data generated so far. Therefore - // we keep track of all of the generated data as an array of blocks. - // All of the blocks except the last have length kBlockSize. - std::vector blocks_; - size_t expected_; - - // Total size of all fully generated blocks so far - size_t full_size_; - - // Pointer into current output block - char* op_base_; // Base of output block - char* op_ptr_; // Pointer to next unfilled byte in block - char* op_limit_; // Pointer just past block - - inline size_t Size() const { - return full_size_ + (op_ptr_ - op_base_); - } - - bool SlowAppend(const char* ip, size_t len); - bool SlowAppendFromSelf(size_t offset, size_t len); - - public: - inline explicit SnappyScatteredWriter(const Allocator& allocator) - : allocator_(allocator), - full_size_(0), - op_base_(NULL), - op_ptr_(NULL), - op_limit_(NULL) { - } - - inline void SetExpectedLength(size_t len) { - assert(blocks_.empty()); - expected_ = len; - } - - inline bool CheckLength() const { - return Size() == expected_; - } - - // Return the number of bytes actually uncompressed so far - inline size_t Produced() const { - return Size(); - } - - inline bool Append(const char* ip, size_t len) { - size_t avail = op_limit_ - op_ptr_; - if (len <= avail) { - // Fast path - memcpy(op_ptr_, ip, len); - op_ptr_ += len; - return true; - } else { - return SlowAppend(ip, len); - } - } - - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { - char* op = op_ptr_; - const int space_left = op_limit_ - op; - if (length <= 16 && available >= 16 + kMaximumTagLength && - space_left >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, op); - op_ptr_ = op + length; - return true; - } else { - return false; - } - } - - inline bool AppendFromSelf(size_t offset, size_t len) { - char* const op_end = op_ptr_ + len; - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (SNAPPY_PREDICT_TRUE(offset - 1u < (size_t)(op_ptr_ - op_base_) && - op_end <= op_limit_)) { - // Fast path: src and dst in current block. - op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_); - return true; - } - return SlowAppendFromSelf(offset, len); - } - - // Called at the end of the decompress. We ask the allocator - // write all blocks to the sink. - inline void Flush() { allocator_.Flush(Produced()); } -}; - -template -bool SnappyScatteredWriter::SlowAppend(const char* ip, size_t len) { - size_t avail = op_limit_ - op_ptr_; - while (len > avail) { - // Completely fill this block - memcpy(op_ptr_, ip, avail); - op_ptr_ += avail; - assert(op_limit_ - op_ptr_ == 0); - full_size_ += (op_ptr_ - op_base_); - len -= avail; - ip += avail; - - // Bounds check - if (full_size_ + len > expected_) { - return false; - } - - // Make new block - size_t bsize = std::min(kBlockSize, expected_ - full_size_); - op_base_ = allocator_.Allocate(bsize); - op_ptr_ = op_base_; - op_limit_ = op_base_ + bsize; - blocks_.push_back(op_base_); - avail = bsize; - } - - memcpy(op_ptr_, ip, len); - op_ptr_ += len; - return true; -} - -template -bool SnappyScatteredWriter::SlowAppendFromSelf(size_t offset, - size_t len) { - // Overflow check - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - const size_t cur = Size(); - if (offset - 1u >= cur) return false; - if (expected_ - cur < len) return false; - - // Currently we shouldn't ever hit this path because Compress() chops the - // input into blocks and does not create cross-block copies. However, it is - // nice if we do not rely on that, since we can get better compression if we - // allow cross-block copies and thus might want to change the compressor in - // the future. - size_t src = cur - offset; - while (len-- > 0) { - char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)]; - Append(&c, 1); - src++; - } - return true; -} - -class SnappySinkAllocator { - public: - explicit SnappySinkAllocator(Sink* dest): dest_(dest) {} - ~SnappySinkAllocator() {} - - char* Allocate(int size) { - Datablock block(new char[size], size); - blocks_.push_back(block); - return block.data; - } - - // We flush only at the end, because the writer wants - // random access to the blocks and once we hand the - // block over to the sink, we can't access it anymore. - // Also we don't write more than has been actually written - // to the blocks. - void Flush(size_t size) { - size_t size_written = 0; - size_t block_size; - for (size_t i = 0; i < blocks_.size(); ++i) { - block_size = std::min(blocks_[i].size, size - size_written); - dest_->AppendAndTakeOwnership(blocks_[i].data, block_size, - &SnappySinkAllocator::Deleter, NULL); - size_written += block_size; - } - blocks_.clear(); - } - - private: - struct Datablock { - char* data; - size_t size; - Datablock(char* p, size_t s) : data(p), size(s) {} - }; - - static void Deleter(void* arg, const char* bytes, size_t size) { - delete[] bytes; - } - - Sink* dest_; - std::vector blocks_; - - // Note: copying this object is allowed -}; - -size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) { - SnappySinkAllocator allocator(uncompressed); - SnappyScatteredWriter writer(allocator); - InternalUncompress(compressed, &writer); - return writer.Produced(); -} - -bool Uncompress(Source* compressed, Sink* uncompressed) { - // Read the uncompressed length from the front of the compressed input - SnappyDecompressor decompressor(compressed); - uint32 uncompressed_len = 0; - if (!decompressor.ReadUncompressedLength(&uncompressed_len)) { - return false; - } - - char c; - size_t allocated_size; - char* buf = uncompressed->GetAppendBufferVariable( - 1, uncompressed_len, &c, 1, &allocated_size); - - const size_t compressed_len = compressed->Available(); - // If we can get a flat buffer, then use it, otherwise do block by block - // uncompression - if (allocated_size >= uncompressed_len) { - SnappyArrayWriter writer(buf); - bool result = InternalUncompressAllTags(&decompressor, &writer, - compressed_len, uncompressed_len); - uncompressed->Append(buf, writer.Produced()); - return result; - } else { - SnappySinkAllocator allocator(uncompressed); - SnappyScatteredWriter writer(allocator); - return InternalUncompressAllTags(&decompressor, &writer, compressed_len, - uncompressed_len); - } -} - -} // namespace duckdb_snappy - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * FSE : Finite State Entropy encoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPILER_H -#define ZSTD_COMPILER_H - -/*-******************************************************* -* Compiler specifics -*********************************************************/ -/* force inlining */ - -#if !defined(ZSTD_NO_INLINE) -#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif - -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif - -#else - -#define INLINE_KEYWORD -#define FORCE_INLINE_ATTR - -#endif - -/** - * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant - * parameters. They must be inlined for the compiler to eliminate the constant - * branches. - */ -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR -/** - * HINT_INLINE is used to help the compiler generate better code. It is *not* - * used for "templates", so it can be tweaked based on the compilers - * performance. - * - * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the - * always_inline attribute. - * - * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline - * attribute. - */ -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 -# define HINT_INLINE static INLINE_KEYWORD -#else -# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR -#endif - -/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ -#if defined(__GNUC__) -# define UNUSED_ATTR __attribute__((unused)) -#else -# define UNUSED_ATTR -#endif - -/* force no inlining */ -#ifdef _MSC_VER -# define FORCE_NOINLINE static __declspec(noinline) -#else -# if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_NOINLINE static __attribute__((__noinline__)) -# else -# define FORCE_NOINLINE static -# endif -#endif - -/* target attribute */ -#ifndef __has_attribute - #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ -#endif -#if defined(__GNUC__) || defined(__ICCARM__) -# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) -#else -# define TARGET_ATTRIBUTE(target) -#endif - -/* Enable runtime BMI2 dispatch based on the CPU. - * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. - */ -#ifndef DYNAMIC_BMI2 - #if ((defined(__clang__) && __has_attribute(__target__)) \ - || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ - && (defined(__x86_64__) || defined(_M_X86)) \ - && !defined(__BMI2__) - # define DYNAMIC_BMI2 1 - #else - # define DYNAMIC_BMI2 0 - #endif -#endif - -/* prefetch - * can be disabled, by declaring NO_PREFETCH build macro */ -#if defined(NO_PREFETCH) -# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -#else -# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) -# elif defined(__aarch64__) -# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) -# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) -# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) -# else -# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -# endif -#endif /* NO_PREFETCH */ - -#define CACHELINE_SIZE 64 - -#define PREFETCH_AREA(p, s) { \ - const char* const _ptr = (const char*)(p); \ - size_t const _size = (size_t)(s); \ - size_t _pos; \ - for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ - PREFETCH_L2(_ptr + _pos); \ - } \ -} - -/* vectorization - * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ -#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) -# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) -# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) -# else -# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") -# endif -#else -# define DONT_VECTORIZE -#endif - -/* Tell the compiler that a branch is likely or unlikely. - * Only use these macros if it causes the compiler to generate better code. - * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc - * and clang, please do. - */ -#if defined(__GNUC__) -#ifndef LIKELY -#define LIKELY(x) (__builtin_expect((x), 1)) -#endif -#ifndef UNLIKELY -#define UNLIKELY(x) (__builtin_expect((x), 0)) -#endif -#else -#ifndef LIKELY -#define LIKELY(x) (x) -#endif -#ifndef UNLIKELY -#define UNLIKELY(x) (x) -#endif -#endif - -#endif /* ZSTD_COMPILER_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef MEM_H_MODULE -#define MEM_H_MODULE - -/*-**************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ -#include /* memcpy */ - - -/*-**************************************** -* Compiler specifics -******************************************/ -#if defined(_MSC_VER) /* Visual Studio */ -# include /* _byteswap_ulong */ -# include /* _byteswap_* */ -#endif -#if defined(__GNUC__) -# define MEM_STATIC static __inline __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - -#ifndef __has_builtin -# define __has_builtin(x) 0 /* compat. with non-clang compilers */ -#endif - -/* code only tested on 32 and 64 bits systems */ -#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } -MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } - -/* detects whether we are being compiled under msan */ -#if defined (__has_feature) -# if __has_feature(memory_sanitizer) -# define MEMORY_SANITIZER 1 -# endif -#endif - -/*-************************************************************** -* Basic Types -*****************************************************************/ -#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef int16_t S16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef int64_t S64; -#else -# include -#if CHAR_BIT != 8 -# error "this implementation requires char to be exactly 8-bit type" -#endif - typedef unsigned char BYTE; -#if USHRT_MAX != 65535 -# error "this implementation requires short to be exactly 16-bit type" -#endif - typedef unsigned short U16; - typedef signed short S16; -#if UINT_MAX != 4294967295 -# error "this implementation requires int to be exactly 32-bit type" -#endif - typedef unsigned int U32; - typedef signed int S32; -/* note : there are no limits defined for long long type in C90. - * limits exist in C99, however, in such case, is preferred */ - typedef unsigned long long U64; - typedef signed long long S64; -#endif - -namespace duckdb_zstd { - -/*-************************************************************** -* Memory I/O -*****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } -MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } - -MEM_STATIC unsigned MEM_isLittleEndian(void) -{ - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard, by lying on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } -MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) - __pragma( pack(push, 1) ) - typedef struct { U16 v; } unalign16; - typedef struct { U32 v; } unalign32; - typedef struct { U64 v; } unalign64; - typedef struct { size_t v; } unalignArch; - __pragma( pack(pop) ) -#else - typedef struct { U16 v; } __attribute__((packed)) unalign16; - typedef struct { U32 v; } __attribute__((packed)) unalign32; - typedef struct { U64 v; } __attribute__((packed)) unalign64; - typedef struct { size_t v; } __attribute__((packed)) unalignArch; -#endif - -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } -MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - -MEM_STATIC U16 MEM_read16(const void* memPtr) -{ - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC U32 MEM_read32(const void* memPtr) -{ - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC U64 MEM_read64(const void* memPtr) -{ - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC size_t MEM_readST(const void* memPtr) -{ - size_t val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -MEM_STATIC void MEM_write32(void* memPtr, U32 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -MEM_STATIC void MEM_write64(void* memPtr, U64 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -#endif /* MEM_FORCE_MEMORY_ACCESS */ - -MEM_STATIC U32 MEM_swap32(U32 in) -{ -#if defined(_MSC_VER) /* Visual Studio */ - return _byteswap_ulong(in); -#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ - || (defined(__clang__) && __has_builtin(__builtin_bswap32)) - return __builtin_bswap32(in); -#else - return ((in << 24) & 0xff000000 ) | - ((in << 8) & 0x00ff0000 ) | - ((in >> 8) & 0x0000ff00 ) | - ((in >> 24) & 0x000000ff ); -#endif -} - -MEM_STATIC U64 MEM_swap64(U64 in) -{ -#if defined(_MSC_VER) /* Visual Studio */ - return _byteswap_uint64(in); -#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ - || (defined(__clang__) && __has_builtin(__builtin_bswap64)) - return __builtin_bswap64(in); -#else - return ((in << 56) & 0xff00000000000000ULL) | - ((in << 40) & 0x00ff000000000000ULL) | - ((in << 24) & 0x0000ff0000000000ULL) | - ((in << 8) & 0x000000ff00000000ULL) | - ((in >> 8) & 0x00000000ff000000ULL) | - ((in >> 24) & 0x0000000000ff0000ULL) | - ((in >> 40) & 0x000000000000ff00ULL) | - ((in >> 56) & 0x00000000000000ffULL); -#endif -} - -MEM_STATIC size_t MEM_swapST(size_t in) -{ - if (MEM_32bits()) - return (size_t)MEM_swap32((U32)in); - else - return (size_t)MEM_swap64((U64)in); -} - -/*=== Little endian r/w ===*/ - -MEM_STATIC U16 MEM_readLE16(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read16(memPtr); - else { - const BYTE* p = (const BYTE*)memPtr; - return (U16)(p[0] + (p[1]<<8)); - } -} - -MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) -{ - if (MEM_isLittleEndian()) { - MEM_write16(memPtr, val); - } else { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val; - p[1] = (BYTE)(val>>8); - } -} - -MEM_STATIC U32 MEM_readLE24(const void* memPtr) -{ - return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); -} - -MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) -{ - MEM_writeLE16(memPtr, (U16)val); - ((BYTE*)memPtr)[2] = (BYTE)(val>>16); -} - -MEM_STATIC U32 MEM_readLE32(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read32(memPtr); - else - return MEM_swap32(MEM_read32(memPtr)); -} - -MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) -{ - if (MEM_isLittleEndian()) - MEM_write32(memPtr, val32); - else - MEM_write32(memPtr, MEM_swap32(val32)); -} - -MEM_STATIC U64 MEM_readLE64(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read64(memPtr); - else - return MEM_swap64(MEM_read64(memPtr)); -} - -MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) -{ - if (MEM_isLittleEndian()) - MEM_write64(memPtr, val64); - else - MEM_write64(memPtr, MEM_swap64(val64)); -} - -MEM_STATIC size_t MEM_readLEST(const void* memPtr) -{ - if (MEM_32bits()) - return (size_t)MEM_readLE32(memPtr); - else - return (size_t)MEM_readLE64(memPtr); -} - -MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) -{ - if (MEM_32bits()) - MEM_writeLE32(memPtr, (U32)val); - else - MEM_writeLE64(memPtr, (U64)val); -} - -/*=== Big endian r/w ===*/ - -MEM_STATIC U32 MEM_readBE32(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_swap32(MEM_read32(memPtr)); - else - return MEM_read32(memPtr); -} - -MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) -{ - if (MEM_isLittleEndian()) - MEM_write32(memPtr, MEM_swap32(val32)); - else - MEM_write32(memPtr, val32); -} - -MEM_STATIC U64 MEM_readBE64(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_swap64(MEM_read64(memPtr)); - else - return MEM_read64(memPtr); -} - -MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) -{ - if (MEM_isLittleEndian()) - MEM_write64(memPtr, MEM_swap64(val64)); - else - MEM_write64(memPtr, val64); -} - -MEM_STATIC size_t MEM_readBEST(const void* memPtr) -{ - if (MEM_32bits()) - return (size_t)MEM_readBE32(memPtr); - else - return (size_t)MEM_readBE64(memPtr); -} - -MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) -{ - if (MEM_32bits()) - MEM_writeBE32(memPtr, (U32)val); - else - MEM_writeBE64(memPtr, (U64)val); -} - -} - -#endif /* MEM_H_MODULE */ - - -// LICENSE_CHANGE_END - /* U32, U16, etc. */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * debug - * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - - -/* - * The purpose of this header is to enable debug functions. - * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, - * and DEBUG_STATIC_ASSERT() for compile-time. - * - * By default, DEBUGLEVEL==0, which means run-time debug is disabled. - * - * Level 1 enables assert() only. - * Starting level 2, traces can be generated and pushed to stderr. - * The higher the level, the more verbose the traces. - * - * It's possible to dynamically adjust level using variable g_debug_level, - * which is only declared if DEBUGLEVEL>=2, - * and is a global variable, not multi-thread protected (use with care) - */ - -#ifndef DEBUG_H_12987983217 -#define DEBUG_H_12987983217 - -#if defined (__cplusplus) -extern "C" { -#endif - - -/* static assert is triggered at compile time, leaving no runtime artefact. - * static assert only works with compile-time constants. - * Also, this variant can only be used inside a function. */ -#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) - - -/* DEBUGLEVEL is expected to be defined externally, - * typically through compiler command line. - * Value must be a number. */ -#ifndef DEBUGLEVEL -# define DEBUGLEVEL 0 -#endif - - -/* DEBUGFILE can be defined externally, - * typically through compiler command line. - * note : currently useless. - * Value must be stderr or stdout */ -#ifndef DEBUGFILE -# define DEBUGFILE stderr -#endif - - -/* recommended values for DEBUGLEVEL : - * 0 : release mode, no debug, all run-time checks disabled - * 1 : enables assert() only, no display - * 2 : reserved, for currently active debug path - * 3 : events once per object lifetime (CCtx, CDict, etc.) - * 4 : events once per frame - * 5 : events once per block - * 6 : events once per sequence (verbose) - * 7+: events at every position (*very* verbose) - * - * It's generally inconvenient to output traces > 5. - * In which case, it's possible to selectively trigger high verbosity levels - * by modifying g_debug_level. - */ - -#if (DEBUGLEVEL>=1) -# include -#else -# ifndef assert /* assert may be already defined, due to prior #include */ -# define assert(condition) ((void)0) /* disable assert (default) */ -# endif -#endif - -#if (DEBUGLEVEL>=2) -# include -extern int g_debuglevel; /* the variable is only declared, - it actually lives in debug.c, - and is shared by the whole process. - It's not thread-safe. - It's useful when enabling very verbose levels - on selective conditions (such as position in src) */ - -# define RAWLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __VA_ARGS__); \ - } } -# define DEBUGLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ - fprintf(stderr, " \n"); \ - } } -#else -# define RAWLOG(l, ...) {} /* disabled */ -# define DEBUGLOG(l, ...) {} /* disabled */ -#endif - - -#if defined (__cplusplus) -} -#endif - -#endif /* DEBUG_H_12987983217 */ - - -// LICENSE_CHANGE_END - /* assert, DEBUGLOG */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * hist : Histogram functions - * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* --- dependencies --- */ -#include /* size_t */ - - -namespace duckdb_zstd { -/* --- simple histogram functions --- */ - -/*! HIST_count(): - * Provides the precise count of each byte within a table 'count'. - * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). - * Updates *maxSymbolValuePtr with actual largest symbol value detected. - * @return : count of the most frequent symbol (which isn't identified). - * or an error code, which can be tested using HIST_isError(). - * note : if return == srcSize, there is only one symbol. - */ -size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); - -unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ - - -/* --- advanced histogram functions --- */ - -#define HIST_WKSP_SIZE_U32 1024 -#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) -/** HIST_count_wksp() : - * Same as HIST_count(), but using an externally provided scratch buffer. - * Benefit is this function will use very little stack space. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize, - void* workSpace, size_t workSpaceSize); - -/** HIST_countFast() : - * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. - * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` - */ -size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); - -/** HIST_countFast_wksp() : - * Same as HIST_countFast(), but using an externally provided scratch buffer. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize, - void* workSpace, size_t workSpaceSize); - -/*! HIST_count_simple() : - * Same as HIST_countFast(), this function is unsafe, - * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. - * It is also a bit slower for large inputs. - * However, it does not need any additional memory (not even on stack). - * @return : count of the most frequent symbol. - * Note this function doesn't produce any error (i.e. it must succeed). - */ -unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); - -} - - -// LICENSE_CHANGE_END - /* HIST_count_wksp */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * bitstream - * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ -#ifndef BITSTREAM_H_MODULE -#define BITSTREAM_H_MODULE - -/* -* This API consists of small unitary functions, which must be inlined for best performance. -* Since link-time-optimization is not available for all compilers, -* these functions are defined into a .h to be included. -*/ - -/*-**************************************** -* Dependencies -******************************************/ - /* unaligned access routines */ - /* UNLIKELY() */ - /* assert(), DEBUGLOG(), RAWLOG() */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* Note : this module is expected to remain private, do not expose it */ - -#ifndef ERROR_H_MODULE -#define ERROR_H_MODULE - -/* **************************************** -* Dependencies -******************************************/ -#include /* size_t */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_ERRORS_H_398273423 -#define ZSTD_ERRORS_H_398273423 - -/*===== dependency =====*/ -#include /* size_t */ - - -/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDERRORLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define ZSTDERRORLIB_VISIBILITY -# endif -#endif -#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY -#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY -#endif -namespace duckdb_zstd { -/*-********************************************* - * Error codes list - *-********************************************* - * Error codes _values_ are pinned down since v1.3.1 only. - * Therefore, don't rely on values if you may link to any version < v1.3.1. - * - * Only values < 100 are considered stable. - * - * note 1 : this API shall be used with static linking only. - * dynamic linking is not yet officially supported. - * note 2 : Prefer relying on the enum than on its value whenever possible - * This is the only supported way to use the error list < v1.3.1 - * note 3 : ZSTD_isError() is always correct, whatever the library version. - **********************************************/ -typedef enum { - ZSTD_error_no_error = 0, - ZSTD_error_GENERIC = 1, - ZSTD_error_prefix_unknown = 10, - ZSTD_error_version_unsupported = 12, - ZSTD_error_frameParameter_unsupported = 14, - ZSTD_error_frameParameter_windowTooLarge = 16, - ZSTD_error_corruption_detected = 20, - ZSTD_error_checksum_wrong = 22, - ZSTD_error_dictionary_corrupted = 30, - ZSTD_error_dictionary_wrong = 32, - ZSTD_error_dictionaryCreation_failed = 34, - ZSTD_error_parameter_unsupported = 40, - ZSTD_error_parameter_outOfBound = 42, - ZSTD_error_tableLog_tooLarge = 44, - ZSTD_error_maxSymbolValue_tooLarge = 46, - ZSTD_error_maxSymbolValue_tooSmall = 48, - ZSTD_error_stage_wrong = 60, - ZSTD_error_init_missing = 62, - ZSTD_error_memory_allocation = 64, - ZSTD_error_workSpace_tooSmall= 66, - ZSTD_error_dstSize_tooSmall = 70, - ZSTD_error_srcSize_wrong = 72, - ZSTD_error_dstBuffer_null = 74, - /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ - ZSTD_error_frameIndex_tooLarge = 100, - ZSTD_error_seekableIO = 102, - ZSTD_error_dstBuffer_wrong = 104, - ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ -} ZSTD_ErrorCode; - -/*! ZSTD_getErrorCode() : - convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, - which can be used to compare with enum list published above */ -ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); -ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ - -} - -#endif /* ZSTD_ERRORS_H_398273423 */ - - -// LICENSE_CHANGE_END - /* enum list */ - -namespace duckdb_zstd { -/* **************************************** -* Compiler-specific -******************************************/ -#if defined(__GNUC__) -# define ERR_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define ERR_STATIC static inline -#elif defined(_MSC_VER) -# define ERR_STATIC static __inline -#else -# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - - -/*-**************************************** -* Customization (error_public.h) -******************************************/ -typedef ZSTD_ErrorCode ERR_enum; -#define PREFIX(name) ZSTD_error_##name - - -/*-**************************************** -* Error codes handling -******************************************/ -#undef ERROR /* already defined on Visual Studio */ -#define ERROR(name) ZSTD_ERROR(name) -#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) - -ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } - -ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } - -/* check and forward error code */ -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e -#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } - - -/*-**************************************** -* Error Strings -******************************************/ - -const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ - -ERR_STATIC const char* ERR_getErrorName(size_t code) -{ - return ERR_getErrorString(ERR_getErrorCode(code)); -} - -} - -#endif /* ERROR_H_MODULE */ - - -// LICENSE_CHANGE_END - /* error codes and messages */ - - -/*========================================= -* Target specific -=========================================*/ -#if defined(__BMI__) && defined(__GNUC__) -# include /* support for bextr (experimental) */ -#elif defined(__ICCARM__) -# include -#endif - -#define STREAM_ACCUMULATOR_MIN_32 25 -#define STREAM_ACCUMULATOR_MIN_64 57 -#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) - -namespace duckdb_zstd { - -/*-****************************************** -* bitStream encoding API (write forward) -********************************************/ -/* bitStream can mix input from multiple sources. - * A critical property of these streams is that they encode and decode in **reverse** direction. - * So the first bit sequence you add will be the last to be read, like a LIFO stack. - */ -typedef struct { - size_t bitContainer; - unsigned bitPos; - char* startPtr; - char* ptr; - char* endPtr; -} BIT_CStream_t; - -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); -MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); - -/* Start with initCStream, providing the size of buffer to write into. -* bitStream will never write outside of this buffer. -* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. -* -* bits are first added to a local register. -* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -* Writing data into memory is an explicit operation, performed by the flushBits function. -* Hence keep track how many bits are potentially stored into local register to avoid register overflow. -* After a flushBits, a maximum of 7 bits might still be stored into local register. -* -* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. -* -* Last operation is to close the bitStream. -* The function returns the final size of CStream in bytes. -* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) -*/ - - -/*-******************************************** -* bitStream decoding API (read backward) -**********************************************/ -typedef struct { - size_t bitContainer; - unsigned bitsConsumed; - const char* ptr; - const char* start; - const char* limitPtr; -} BIT_DStream_t; - -typedef enum { BIT_DStream_unfinished = 0, - BIT_DStream_endOfBuffer = 1, - BIT_DStream_completed = 2, - BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ - /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ - -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); - - -/* Start by invoking BIT_initDStream(). -* A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -* You can then retrieve bitFields stored into the local register, **in reverse order**. -* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. -* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -* Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). -*/ - - -/*-**************************************** -* unsafe API -******************************************/ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ - -MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); -/* unsafe version; does not check buffer overflow */ - -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); -/* faster, but works only if nbBits >= 1 */ - - - -/*-************************************************************** -* Internal functions -****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (U32 val) -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, - 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, - 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; -# endif - } -} - -/*===== Local Constants =====*/ -static const unsigned BIT_mask[] = { - 0, 1, 3, 7, 0xF, 0x1F, - 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, - 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, - 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, - 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, - 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ -#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) - -/*-************************************************************** -* bitStream encoding -****************************************************************/ -/*! BIT_initCStream() : - * `dstCapacity` must be > sizeof(size_t) - * @return : 0 if success, - * otherwise an error code (can be tested using ERR_isError()) */ -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, - void* startPtr, size_t dstCapacity) -{ - bitC->bitContainer = 0; - bitC->bitPos = 0; - bitC->startPtr = (char*)startPtr; - bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); - if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); - return 0; -} - -/*! BIT_addBits() : - * can add up to 31 bits into `bitC`. - * Note : does not check for register overflow ! */ -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, - size_t value, unsigned nbBits) -{ - MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); - assert(nbBits < BIT_MASK_SIZE); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); - bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_addBitsFast() : - * works only if `value` is _clean_, - * meaning all high bits above nbBits are 0 */ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, - size_t value, unsigned nbBits) -{ - assert((value>>nbBits) == 0); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); - bitC->bitContainer |= value << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_flushBitsFast() : - * assumption : bitContainer has not overflowed - * unsafe version; does not check buffer overflow */ -MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); - assert(bitC->ptr <= bitC->endPtr); - MEM_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; -} - -/*! BIT_flushBits() : - * assumption : bitContainer has not overflowed - * safe version; check for buffer overflow, and prevents it. - * note : does not signal buffer overflow. - * overflow will be revealed later on using BIT_closeCStream() */ -MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); - assert(bitC->ptr <= bitC->endPtr); - MEM_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; -} - -/*! BIT_closeCStream() : - * @return : size of CStream, in bytes, - * or 0 if it could not fit into dstBuffer */ -MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) -{ - BIT_addBitsFast(bitC, 1, 1); /* endMark */ - BIT_flushBits(bitC); - if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ - return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); -} - - -/*-******************************************************** -* bitStream decoding -**********************************************************/ -/*! BIT_initDStream() : - * Initialize a BIT_DStream_t. - * `bitD` : a pointer to an already allocated BIT_DStream_t structure. - * `srcSize` must be the *exact* size of the bitStream, in bytes. - * @return : size of stream (== srcSize), or an errorCode if a problem is detected - */ -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) -{ - if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } - - bitD->start = (const char*)srcBuffer; - bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); - - if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ - bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); - bitD->bitContainer = MEM_readLEST(bitD->ptr); - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } - } else { - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); - /* fall-through */ - - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); - /* fall-through */ - - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); - /* fall-through */ - - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; - /* fall-through */ - - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; - /* fall-through */ - - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; - /* fall-through */ - - default: break; - } - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; - if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ - } - bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; - } - - return srcSize; -} - -MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) -{ - return bitContainer >> start; -} - -MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) -{ - U32 const regMask = sizeof(bitContainer)*8 - 1; - /* if start > regMask, bitstream is corrupted, and result is undefined */ - assert(nbBits < BIT_MASK_SIZE); - return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; -} - -MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) -{ - assert(nbBits < BIT_MASK_SIZE); - return bitContainer & BIT_mask[nbBits]; -} - -/*! BIT_lookBits() : - * Provides next n bits from local register. - * local register is not modified. - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) -{ - /* arbitrate between double-shift and shift+mask */ -#if 1 - /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, - * bitstream is likely corrupted, and result is undefined */ - return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); -#else - /* this code path is slower on my os-x laptop */ - U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); -#endif -} - -/*! BIT_lookBitsFast() : - * unsafe version; only works if nbBits >= 1 */ -MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) -{ - U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; - assert(nbBits >= 1); - return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); -} - -MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) -{ - bitD->bitsConsumed += nbBits; -} - -/*! BIT_readBits() : - * Read (consume) next n bits from local register and update. - * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) -{ - size_t const value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_readBitsFast() : - * unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) -{ - size_t const value = BIT_lookBitsFast(bitD, nbBits); - assert(nbBits >= 1); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_reloadDStreamFast() : - * Similar to BIT_reloadDStream(), but with two differences: - * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! - * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this - * point you must use BIT_reloadDStream() to reload. - */ -MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) -{ - if (UNLIKELY(bitD->ptr < bitD->limitPtr)) - return BIT_DStream_overflow; - assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = MEM_readLEST(bitD->ptr); - return BIT_DStream_unfinished; -} - -/*! BIT_reloadDStream() : - * Refill `bitD` from buffer previously set in BIT_initDStream() . - * This function is safe, it guarantees it will not read beyond src buffer. - * @return : status of `BIT_DStream_t` internal register. - * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ - return BIT_DStream_overflow; - - if (bitD->ptr >= bitD->limitPtr) { - return BIT_reloadDStreamFast(bitD); - } - if (bitD->ptr == bitD->start) { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; - } - /* start < ptr < limitPtr */ - { U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = BIT_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes*8; - bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ - return result; - } -} - -/*! BIT_endOfDStream() : - * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). - */ -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) -{ - return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); -} - -} - -#endif /* BITSTREAM_H_MODULE */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * FSE : Finite State Entropy codec - * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -#ifndef FSE_H -#define FSE_H - - -/*-***************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ - - -namespace duckdb_zstd { -/*-***************************************** -* FSE_PUBLIC_API : control library symbols visibility -******************************************/ -#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) -# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) -#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ -# define FSE_PUBLIC_API __declspec(dllexport) -#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) -# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define FSE_PUBLIC_API -#endif - -/*------ Version ------*/ -#define FSE_VERSION_MAJOR 0 -#define FSE_VERSION_MINOR 9 -#define FSE_VERSION_RELEASE 0 - -#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE -#define FSE_QUOTE(str) #str -#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) -#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) - -#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) -FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ - - -/*-**************************************** -* FSE simple functions -******************************************/ -/*! FSE_compress() : - Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. - 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). - @return : size of compressed data (<= dstCapacity). - Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. - if FSE_isError(return), compression failed (more details using FSE_getErrorName()) -*/ -FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - -/*! FSE_decompress(): - Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', - into already allocated destination buffer 'dst', of size 'dstCapacity'. - @return : size of regenerated data (<= maxDstSize), - or an error code, which can be tested using FSE_isError() . - - ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! - Why ? : making this distinction requires a header. - Header management is intentionally delegated to the user layer, which can better manage special cases. -*/ -FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, - const void* cSrc, size_t cSrcSize); - - -/*-***************************************** -* Tool functions -******************************************/ -FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ - -/* Error Management */ -FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ -FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ - - -/*-***************************************** -* FSE advanced functions -******************************************/ -/*! FSE_compress2() : - Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' - Both parameters can be defined as '0' to mean : use default value - @return : size of compressed data - Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. - if FSE_isError(return), it's an error code. -*/ -FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); - - -/*-***************************************** -* FSE detailed API -******************************************/ -/*! -FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] (see hist.h) -2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) -3. save normalized counters to memory buffer using writeNCount() -4. build encoding table 'CTable' from normalized counters -5. encode the data stream using encoding table 'CTable' - -FSE_decompress() does the following: -1. read normalized counters with readNCount() -2. build decoding table 'DTable' from normalized counters -3. decode the data stream using decoding table 'DTable' - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and provide normalized distribution using external method. -*/ - -/* *** COMPRESSION *** */ - -/*! FSE_optimalTableLog(): - dynamically downsize 'tableLog' when conditions are met. - It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. - @return : recommended tableLog (necessarily <= 'maxTableLog') */ -FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_normalizeCount(): - normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) - 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). - @return : tableLog, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_NCountWriteBound(): - Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. - Typically useful for allocation purpose. */ -FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_writeNCount(): - Compactly save 'normalizedCounter' into 'buffer'. - @return : size of the compressed table, - or an errorCode, which can be tested using FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, - const short* normalizedCounter, - unsigned maxSymbolValue, unsigned tableLog); - -/*! Constructor and Destructor of FSE_CTable. - Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ -typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ -FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); -FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); - -/*! FSE_buildCTable(): - Builds `ct`, which must be already allocated, using FSE_createCTable(). - @return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_compress_usingCTable(): - Compress `src` using `ct` into `dst` which must be already allocated. - @return : size of compressed data (<= `dstCapacity`), - or 0 if compressed data could not fit into `dst`, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); - -/*! -Tutorial : ----------- -The first step is to count all symbols. FSE_count() does this job very fast. -Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. -'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] -maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) -FSE_count() will return the number of occurrence of the most frequent symbol. -This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). - -The next step is to normalize the frequencies. -FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. -It also guarantees a minimum of 1 to any Symbol with frequency >= 1. -You can use 'tableLog'==0 to mean "use default tableLog value". -If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), -which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). - -The result of FSE_normalizeCount() will be saved into a table, -called 'normalizedCounter', which is a table of signed short. -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. -The return value is tableLog if everything proceeded as expected. -It is 0 if there is a single symbol within distribution. -If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). - -'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). -'buffer' must be already allocated. -For guaranteed success, buffer size must be at least FSE_headerBound(). -The result of the function is the number of bytes written into 'buffer'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). - -'normalizedCounter' can then be used to create the compression table 'CTable'. -The space required by 'CTable' must be already allocated, using FSE_createCTable(). -You can then use FSE_buildCTable() to fill 'CTable'. -If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). - -'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). -Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' -The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. -If it returns '0', compressed data could not fit into 'dst'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -*/ - - -/* *** DECOMPRESSION *** */ - -/*! FSE_readNCount(): - Read compactly saved 'normalizedCounter' from 'rBuffer'. - @return : size read from 'rBuffer', - or an errorCode, which can be tested using FSE_isError(). - maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, - unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, - const void* rBuffer, size_t rBuffSize); - -/*! Constructor and Destructor of FSE_DTable. - Note that its size depends on 'tableLog' */ -typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ -FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); -FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); - -/*! FSE_buildDTable(): - Builds 'dt', which must be already allocated, using FSE_createDTable(). - return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_decompress_usingDTable(): - Decompress compressed source `cSrc` of size `cSrcSize` using `dt` - into `dst` which must be already allocated. - @return : size of regenerated data (necessarily <= `dstCapacity`), - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); - -/*! -Tutorial : ----------- -(Note : these functions only decompress FSE-compressed blocks. - If block is uncompressed, use memcpy() instead - If block is a single repeated byte, use memset() instead ) - -The first step is to obtain the normalized frequencies of symbols. -This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. -In practice, that means it's necessary to know 'maxSymbolValue' beforehand, -or size the table to handle worst case situations (typically 256). -FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. -The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. -Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. -This is performed by the function FSE_buildDTable(). -The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). -`cSrcSize` must be strictly correct, otherwise decompression will fail. -FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) -*/ - -} - -#endif /* FSE_H */ - - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * FSE : Finite State Entropy codec - * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -#ifndef FSE_H_FSE_STATIC_LINKING_ONLY -#define FSE_H_FSE_STATIC_LINKING_ONLY - -/* *** Dependency *** */ - - -namespace duckdb_zstd { - -/* ***************************************** -* Static allocation -*******************************************/ -/* FSE buffer bounds */ -#define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) -#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1< 12) ? (1 << (maxTableLog - 2)) : 1024) ) -size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); - -size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); -/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ - -size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); -/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ - -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` must be >= `(1<= BIT_DStream_completed - -When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. -Checking if DStream has reached its end is performed by : - BIT_endOfDStream(&DStream); -Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. - FSE_endOfDState(&DState); -*/ - - -/* ***************************************** -* FSE unsafe API -*******************************************/ -static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); -/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ - - -/* ***************************************** -* Implementation of inlined functions -*******************************************/ -typedef struct { - int deltaFindState; - U32 deltaNbBits; -} FSE_symbolCompressionTransform; /* total 8 bytes */ - -MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) -{ - const void* ptr = ct; - const U16* u16ptr = (const U16*) ptr; - const U32 tableLog = MEM_read16(ptr); - statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; - statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); - statePtr->stateLog = tableLog; -} - - -/*! FSE_initCState2() : -* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) -* uses the smallest state value possible, saving the cost of this symbol */ -MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) -{ - FSE_initCState(statePtr, ct); - { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* stateTable = (const U16*)(statePtr->stateTable); - U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); - statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; - statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; - } -} - -MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) -{ - FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* const stateTable = (const U16*)(statePtr->stateTable); - U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); - BIT_addBits(bitC, statePtr->value, nbBitsOut); - statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; -} - -MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) -{ - BIT_addBits(bitC, statePtr->value, statePtr->stateLog); - BIT_flushBits(bitC); -} - - -/* FSE_getMaxNbBits() : - * Approximate maximum cost of a symbol, in bits. - * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) - * note 1 : assume symbolValue is valid (<= maxSymbolValue) - * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ -MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) -{ - const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; - return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; -} - -/* FSE_bitCost() : - * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) - * note 1 : assume symbolValue is valid (<= maxSymbolValue) - * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ -MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) -{ - const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; - U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; - U32 const threshold = (minNbBits+1) << 16; - assert(tableLog < 16); - assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ - { U32 const tableSize = 1 << tableLog; - U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); - U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ - U32 const bitMultiplier = 1 << accuracyLog; - assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); - assert(normalizedDeltaFromThreshold <= bitMultiplier); - return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; - } -} - - -/* ====== Decompression ====== */ - -typedef struct { - U16 tableLog; - U16 fastMode; -} FSE_DTableHeader; /* sizeof U32 */ - -typedef struct -{ - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ - -MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - return DInfo.symbol; -} - -MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.newState + lowBits; -} - -MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBits(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -/*! FSE_decodeSymbolFast() : - unsafe, only works if no symbol has a probability > 50% */ -MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBitsFast(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) -{ - return DStatePtr->state == 0; -} - - - -#ifndef FSE_COMMONDEFS_ONLY - -/* ************************************************************** -* Tuning parameters -****************************************************************/ -/*!MEMORY_USAGE : -* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect -* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -#ifndef FSE_MAX_MEMORY_USAGE -# define FSE_MAX_MEMORY_USAGE 14 -#endif -#ifndef FSE_DEFAULT_MEMORY_USAGE -# define FSE_DEFAULT_MEMORY_USAGE 13 -#endif - -/*!FSE_MAX_SYMBOL_VALUE : -* Maximum symbol value authorized. -* Required for proper stack allocation */ -#ifndef FSE_MAX_SYMBOL_VALUE -# define FSE_MAX_SYMBOL_VALUE 255 -#endif - -/* ************************************************************** -* template functions type & suffix -****************************************************************/ -#define FSE_FUNCTION_TYPE BYTE -#define FSE_FUNCTION_EXTENSION -#define FSE_DECODE_TYPE FSE_decode_t - - -#endif /* !FSE_COMMONDEFS_ONLY */ - - -/* *************************************************************** -* Constants -*****************************************************************/ -#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) -#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX -# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" -#endif - -#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) - -} - -#endif /* FSE_H_FSE_STATIC_LINKING_ONLY */ - - -// LICENSE_CHANGE_END - - - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define FSE_isError ERR_isError - - -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -# error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -# error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X,Y) X##Y -#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) -#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - -namespace duckdb_zstd { - -/* Function templates */ - -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - U32 const step = FSE_TABLESTEP(tableSize); - U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; - - FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; - U32 highThreshold = tableSize-1; - - /* CTable header */ - if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); - tableU16[-2] = (U16) tableLog; - tableU16[-1] = (U16) maxSymbolValue; - assert(tableLog < 16); /* required for threshold strategy to work */ - - /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ - - #ifdef __clang_analyzer__ - memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ - #endif - - /* symbol start positions */ - { U32 u; - cumul[0] = 0; - for (u=1; u <= maxSymbolValue+1; u++) { - if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ - cumul[u] = cumul[u-1] + 1; - tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); - } else { - cumul[u] = cumul[u-1] + normalizedCounter[u-1]; - } } - cumul[maxSymbolValue+1] = tableSize+1; - } - - /* Spread symbols */ - { U32 position = 0; - U32 symbol; - for (symbol=0; symbol<=maxSymbolValue; symbol++) { - int nbOccurrences; - int const freq = normalizedCounter[symbol]; - for (nbOccurrences=0; nbOccurrences highThreshold) - position = (position + step) & tableMask; /* Low proba area */ - } } - - assert(position==0); /* Must have initialized all positions */ - } - - /* Build table */ - { U32 u; for (u=0; u> 3) + 3; - return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ -} - -static size_t -FSE_writeNCount_generic (void* header, size_t headerBufferSize, - const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, - unsigned writeIsSafe) -{ - BYTE* const ostart = (BYTE*) header; - BYTE* out = ostart; - BYTE* const oend = ostart + headerBufferSize; - int nbBits; - const int tableSize = 1 << tableLog; - int remaining; - int threshold; - U32 bitStream = 0; - int bitCount = 0; - unsigned symbol = 0; - unsigned const alphabetSize = maxSymbolValue + 1; - int previousIs0 = 0; - - /* Table Size */ - bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; - bitCount += 4; - - /* Init */ - remaining = tableSize+1; /* +1 for extra accuracy */ - threshold = tableSize; - nbBits = tableLog+1; - - while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ - if (previousIs0) { - unsigned start = symbol; - while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; - if (symbol == alphabetSize) break; /* incorrect distribution */ - while (symbol >= start+24) { - start+=24; - bitStream += 0xFFFFU << bitCount; - if ((!writeIsSafe) && (out > oend-2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE) bitStream; - out[1] = (BYTE)(bitStream>>8); - out+=2; - bitStream>>=16; - } - while (symbol >= start+3) { - start+=3; - bitStream += 3 << bitCount; - bitCount += 2; - } - bitStream += (symbol-start) << bitCount; - bitCount += 2; - if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } } - { int count = normalizedCounter[symbol++]; - int const max = (2*threshold-1) - remaining; - remaining -= count < 0 ? -count : count; - count++; /* +1 for extra accuracy */ - if (count>=threshold) - count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ - bitStream += count << bitCount; - bitCount += nbBits; - bitCount -= (count>=1; } - } - if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } } - - if (remaining != 1) - return ERROR(GENERIC); /* incorrect normalized distribution */ - assert(symbol <= alphabetSize); - - /* flush remaining bitStream */ - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out+= (bitCount+7) /8; - - return (out-ostart); -} - - -size_t FSE_writeNCount (void* buffer, size_t bufferSize, - const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ - if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ - - if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); - - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); -} - - -/*-************************************************************** -* FSE Compression Code -****************************************************************/ - -FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) -{ - size_t size; - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); - return (FSE_CTable*)malloc(size); -} - -void FSE_freeCTable (FSE_CTable* ct) { free(ct); } - -/* provides the minimum logSize to safely represent a distribution */ -static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) -{ - U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - assert(srcSize > 1); /* Not supported, RLE should be used instead */ - return minBits; -} - -unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) -{ - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; - U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); - assert(srcSize > 1); /* Not supported, RLE should be used instead */ - if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ - if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ - if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; - if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; - return tableLog; -} - -unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) -{ - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); -} - - -/* Secondary normalization method. - To be used when primary method fails. */ - -static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) -{ - short const NOT_YET_ASSIGNED = -2; - U32 s; - U32 distributed = 0; - U32 ToDistribute; - - /* Init */ - U32 const lowThreshold = (U32)(total >> tableLog); - U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); - - for (s=0; s<=maxSymbolValue; s++) { - if (count[s] == 0) { - norm[s]=0; - continue; - } - if (count[s] <= lowThreshold) { - norm[s] = -1; - distributed++; - total -= count[s]; - continue; - } - if (count[s] <= lowOne) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } - - norm[s]=NOT_YET_ASSIGNED; - } - ToDistribute = (1 << tableLog) - distributed; - - if (ToDistribute == 0) - return 0; - - if ((total / ToDistribute) > lowOne) { - /* risk of rounding to zero */ - lowOne = (U32)((total * 3) / (ToDistribute * 2)); - for (s=0; s<=maxSymbolValue; s++) { - if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } } - ToDistribute = (1 << tableLog) - distributed; - } - - if (distributed == maxSymbolValue+1) { - /* all values are pretty poor; - probably incompressible data (should have already been detected); - find max, then give all remaining points to max */ - U32 maxV = 0, maxC = 0; - for (s=0; s<=maxSymbolValue; s++) - if (count[s] > maxC) { maxV=s; maxC=count[s]; } - norm[maxV] += (short)ToDistribute; - return 0; - } - - if (total == 0) { - /* all of the symbols were low enough for the lowOne or lowThreshold */ - for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) - if (norm[s] > 0) { ToDistribute--; norm[s]++; } - return 0; - } - - { U64 const vStepLog = 62 - tableLog; - U64 const mid = (1ULL << (vStepLog-1)) - 1; - U64 const rStep = ((((U64)1<> vStepLog); - U32 const sEnd = (U32)(end >> vStepLog); - U32 const weight = sEnd - sStart; - if (weight < 1) - return ERROR(GENERIC); - norm[s] = (short)weight; - tmpTotal = end; - } } } - - return 0; -} - - -size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t total, - unsigned maxSymbolValue) -{ - /* Sanity checks */ - if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ - if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - - { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; - U64 const scale = 62 - tableLog; - U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ - U64 const vStep = 1ULL<<(scale-20); - int stillToDistribute = 1<> tableLog); - - for (s=0; s<=maxSymbolValue; s++) { - if (count[s] == total) return 0; /* rle special case */ - if (count[s] == 0) { normalizedCounter[s]=0; continue; } - if (count[s] <= lowThreshold) { - normalizedCounter[s] = -1; - stillToDistribute--; - } else { - short proba = (short)((count[s]*step) >> scale); - if (proba<8) { - U64 restToBeat = vStep * rtbTable[proba]; - proba += (count[s]*step) - ((U64)proba< restToBeat; - } - if (proba > largestP) { largestP=proba; largest=s; } - normalizedCounter[s] = proba; - stillToDistribute -= proba; - } } - if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { - /* corner case, need another normalization method */ - size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); - if (FSE_isError(errorCode)) return errorCode; - } - else normalizedCounter[largest] += (short)stillToDistribute; - } - -#if 0 - { /* Print Table (debug) */ - U32 s; - U32 nTotal = 0; - for (s=0; s<=maxSymbolValue; s++) - RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); - for (s=0; s<=maxSymbolValue; s++) - nTotal += abs(normalizedCounter[s]); - if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* header */ - tableU16[-2] = (U16) nbBits; - tableU16[-1] = (U16) maxSymbolValue; - - /* Build table */ - for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - FSE_FLUSHBITS(&bitC); - } - - /* 2 or 4 encoding per loop */ - while ( ip>istart ) { - - FSE_encodeSymbol(&bitC, &CState2, *--ip); - - if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ - FSE_FLUSHBITS(&bitC); - - FSE_encodeSymbol(&bitC, &CState1, *--ip); - - if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - } - - FSE_FLUSHBITS(&bitC); - } - - FSE_flushCState(&bitC, &CState2); - FSE_flushCState(&bitC, &CState1); - return BIT_closeCStream(&bitC); -} - -size_t FSE_compress_usingCTable (void* dst, size_t dstSize, - const void* src, size_t srcSize, - const FSE_CTable* ct) -{ - unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); - - if (fast) - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); - else - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); -} - - -size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } - -/* FSE_compress_wksp() : - * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` size must be `(1< not compressible */ - if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ - } - - tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); - - /* Write table description header */ - { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); - op += nc_err; - } - - /* Compress */ - CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); - { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); - if (cSize == 0) return 0; /* not enough space for compressed data */ - op += cSize; - } - - /* check compressibility */ - if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; - - return op-ostart; -} - -typedef struct { - FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; - BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; -} fseWkspMax_t; - -size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) -{ - fseWkspMax_t scratchBuffer; - DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); -} - -size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); -} - -} - -#endif /* FSE_COMMONDEFS_ONLY */ - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * hist : Histogram functions - * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* --- dependencies --- */ - /* U32, BYTE, etc. */ - /* assert, DEBUGLOG */ - /* ERROR */ - - - -namespace duckdb_zstd { - -/* --- Error management --- */ -unsigned HIST_isError(size_t code) { return ERR_isError(code); } - -/*-************************************************************** - * Histogram functions - ****************************************************************/ -unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; - const BYTE* const end = ip + srcSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned largestCount=0; - - memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); - if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } - - while (ip largestCount) largestCount = count[s]; - } - - return largestCount; -} - -typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; - -/* HIST_count_parallel_wksp() : - * store histogram into 4 intermediate tables, recombined at the end. - * this design makes better use of OoO cpus, - * and is noticeably faster when some values are heavily repeated. - * But it needs some additional workspace for intermediate tables. - * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. - * @return : largest histogram frequency, - * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ -static size_t HIST_count_parallel_wksp( - unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - HIST_checkInput_e check, - U32* const workSpace) -{ - const BYTE* ip = (const BYTE*)source; - const BYTE* const iend = ip+sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max=0; - U32* const Counting1 = workSpace; - U32* const Counting2 = Counting1 + 256; - U32* const Counting3 = Counting2 + 256; - U32* const Counting4 = Counting3 + 256; - - memset(workSpace, 0, 4*256*sizeof(unsigned)); - - /* safety checks */ - if (!sourceSize) { - memset(count, 0, maxSymbolValue + 1); - *maxSymbolValuePtr = 0; - return 0; - } - if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ - - /* by stripes of 16 bytes */ - { U32 cached = MEM_read32(ip); ip += 4; - while (ip < iend-15) { - U32 c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - } - ip-=4; - } - - /* finish last symbols */ - while (ipmaxSymbolValue; s--) { - Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); - } } - - { U32 s; - if (maxSymbolValue > 255) maxSymbolValue = 255; - for (s=0; s<=maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) max = count[s]; - } } - - while (!count[maxSymbolValue]) maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - return (size_t)max; -} - -/* HIST_countFast_wksp() : - * Same as HIST_countFast(), but using an externally provided scratch buffer. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - void* workSpace, size_t workSpaceSize) -{ - if (sourceSize < 1500) /* heuristic threshold */ - return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); - return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); -} - -/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ -size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize) -{ - unsigned tmpCounters[HIST_WKSP_SIZE_U32]; - return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); -} - -/* HIST_count_wksp() : - * Same as HIST_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ -size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - void* workSpace, size_t workSpaceSize) -{ - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); - if (*maxSymbolValuePtr < 255) - return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); - *maxSymbolValuePtr = 255; - return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); -} - -size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - unsigned tmpCounters[HIST_WKSP_SIZE_U32]; - return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * Huffman encoder, part of New Generation Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - - -/* ************************************************************** -* Includes -****************************************************************/ -#include /* memcpy, memset */ -#include /* printf (debug) */ - - - - /* header compression */ - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * huff0 huffman codec, - * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -#ifndef HUF_H_298734234 -#define HUF_H_298734234 - -/* *** Dependencies *** */ -#include /* size_t */ - - -/* *** library symbols visibility *** */ -/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, - * HUF symbols remain "private" (internal symbols for library only). - * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ -#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) -# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) -#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ -# define HUF_PUBLIC_API __declspec(dllexport) -#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) -# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ -#else -# define HUF_PUBLIC_API -#endif - -namespace duckdb_zstd { - -/* ========================== */ -/* *** simple functions *** */ -/* ========================== */ - -/** HUF_compress() : - * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. - * 'dst' buffer must be already allocated. - * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). - * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. - * @return : size of compressed data (<= `dstCapacity`). - * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) - */ -HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - -/** HUF_decompress() : - * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', - * into already allocated buffer 'dst', of minimum size 'dstSize'. - * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. - * Note : in contrast with FSE, HUF_decompress can regenerate - * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, - * because it knows size to regenerate (originalSize). - * @return : size of regenerated data (== originalSize), - * or an error code, which can be tested using HUF_isError() - */ -HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize); - - -/* *** Tool functions *** */ -#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ -HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ - -/* Error Management */ -HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ -HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ - - -/* *** Advanced function *** */ - -/** HUF_compress2() : - * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. - * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . - * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ -HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog); - -/** HUF_compress4X_wksp() : - * Same as HUF_compress2(), but uses externally allocated `workSpace`. - * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ -#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) -#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) -HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize); - -#endif /* HUF_H_298734234 */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * WARNING !! - * The following section contains advanced and experimental definitions - * which shall never be used in the context of a dynamic library, - * because they are not guaranteed to remain stable in the future. - * Only consider them in association with static linking. - * **************************************************************** */ -#ifndef HUF_H_HUF_STATIC_LINKING_ONLY -#define HUF_H_HUF_STATIC_LINKING_ONLY - -/* *** Dependencies *** */ - /* U32 */ - - -/* *** Constants *** */ -#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ -#define HUF_SYMBOLVALUE_MAX 255 - -#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) -# error "HUF_TABLELOG_MAX is too large !" -#endif - - -/* **************************************** -* Static allocation -******************************************/ -/* HUF buffer bounds */ -#define HUF_CTABLEBOUND 129 -#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ -#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* static allocation of HUF's Compression Table */ -#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ -#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) -#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ - void* name##hv = &(name##hb); \ - HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ - -/* static allocation of HUF's DTable */ -typedef U32 HUF_DTable; -#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) -#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ - HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } -#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ - HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } - - -/* **************************************** -* Advanced decompression functions -******************************************/ -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -#endif - -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ -#endif - - -/* **************************************** - * HUF detailed API - * ****************************************/ - -/*! HUF_compress() does the following: - * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "zstd/common/fse.h") - * 2. (optional) refine tableLog using HUF_optimalTableLog() - * 3. build Huffman table from count using HUF_buildCTable() - * 4. save Huffman table to memory buffer using HUF_writeCTable() - * 5. encode the data stream using HUF_compress4X_usingCTable() - * - * The following API allows targeting specific sub-functions for advanced tasks. - * For example, it's possible to compress several blocks using the same 'CTable', - * or to save and regenerate 'CTable' using external methods. - */ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ -size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); -size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); -int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); - -typedef enum { - HUF_repeat_none, /**< Cannot use the previous table */ - HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ - HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ - } HUF_repeat; -/** HUF_compress4X_repeat() : - * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. - * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress4X_repeat(void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. - */ -#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) -#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) -size_t HUF_buildCTable_wksp (HUF_CElt* tree, - const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, - void* workSpace, size_t wkspSize); - -/*! HUF_readStats() : - * Read compact Huffman tree, saved by HUF_writeCTable(). - * `huffWeight` is destination buffer. - * @return : size read from `src` , or an error Code . - * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ -size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, - U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize); - -/** HUF_readCTable() : - * Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); - -/** HUF_getNbBits() : - * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX - * Note 1 : is not inlined, as HUF_CElt definition is private - * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ -U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); - -/* - * HUF_decompress() does the following: - * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics - * 2. build Huffman table from save, using HUF_readDTableX?() - * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() - */ - -/** HUF_selectDecoder() : - * Tells which decoder is likely to decode faster, - * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . - * Assumption : 0 < dstSize <= 128 KB */ -U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); - -/** - * The minimum workspace size for the `workSpace` used in - * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). - * - * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when - * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. - * Buffer overflow errors may potentially occur if code modifications result in - * a required workspace size greater than that specified in the following - * macro. - */ -#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) -#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) - -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#endif - -size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif - - -/* ====================== */ -/* single stream variants */ -/* ====================== */ - -size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); -/** HUF_compress1X_repeat() : - * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. - * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress1X_repeat(void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); - -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ -#endif - -size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); -size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ -#endif - -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif - -/* BMI2 variants. - * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. - */ -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); -#endif -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); - -} - -#endif /* HUF_STATIC_LINKING_ONLY */ - - - -// LICENSE_CHANGE_END - - - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define HUF_isError ERR_isError -#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ - - -namespace duckdb_zstd { -/* ************************************************************** -* Utils -****************************************************************/ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) -{ - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); -} - - -/* ******************************************************* -* HUF : Huffman block compression -*********************************************************/ -/* HUF_compressWeights() : - * Same as FSE_compress(), but dedicated to huff0's weights compression. - * The use case needs much less stack memory. - * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. - */ -#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 -static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) -{ - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const oend = ostart + dstSize; - - unsigned maxSymbolValue = HUF_TABLELOG_MAX; - U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; - - FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; - BYTE scratchBuffer[1< not compressible */ - } - - tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); - - /* Write table description header */ - { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); - op += hSize; - } - - /* Compress */ - CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); - { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); - if (cSize == 0) return 0; /* not enough space for compressed data */ - op += cSize; - } - - return (size_t)(op-ostart); -} - - -struct HUF_CElt_s { - U16 val; - BYTE nbBits; -}; /* typedef'd to HUF_CElt within "zstd/common/huf.h" */ - -/*! HUF_writeCTable() : - `CTable` : Huffman tree to save, using huf representation. - @return : size of saved CTable */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, - const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) -{ - BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ - BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; - BYTE* op = (BYTE*)dst; - U32 n; - - /* check conditions */ - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); - - /* convert to weight */ - bitsToWeight[0] = 0; - for (n=1; n1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ - op[0] = (BYTE)hSize; - return hSize+1; - } } - - /* write raw values as 4-bits (max : 15) */ - if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ - if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ - op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); - huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ - for (n=0; n HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); - - /* Prepare base value per rank */ - { U32 n, nextRankStart = 0; - for (n=1; n<=tableLog; n++) { - U32 current = nextRankStart; - nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; - } } - - /* fill nbBits */ - *hasZeroWeights = 0; - { U32 n; for (n=0; nn=tableLog+1 */ - U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; - { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } } - /* assign value within rank, symbol order */ - { U32 n; for (n=0; n maxNbBits */ - - /* there are several too large elements (at least >= 2) */ - { int totalCost = 0; - const U32 baseCost = 1 << (largestBits - maxNbBits); - int n = (int)lastNonNull; - - while (huffNode[n].nbBits > maxNbBits) { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); - huffNode[n].nbBits = (BYTE)maxNbBits; - n --; - } /* n stops at huffNode[n].nbBits <= maxNbBits */ - while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ - - /* renorm totalCost */ - totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ - - /* repay normalized cost */ - { U32 const noSymbol = 0xF0F0F0F0; - U32 rankLast[HUF_TABLELOG_MAX+2]; - - /* Get pos of last (smallest) symbol per rank */ - memset(rankLast, 0xF0, sizeof(rankLast)); - { U32 currentNbBits = maxNbBits; - int pos; - for (pos=n ; pos >= 0; pos--) { - if (huffNode[pos].nbBits >= currentNbBits) continue; - currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits-currentNbBits] = (U32)pos; - } } - - while (totalCost > 0) { - U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; - for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 const highPos = rankLast[nBitsToDecrease]; - U32 const lowPos = rankLast[nBitsToDecrease-1]; - if (highPos == noSymbol) continue; - if (lowPos == noSymbol) break; - { U32 const highTotal = huffNode[highPos].count; - U32 const lowTotal = 2 * huffNode[lowPos].count; - if (highTotal <= lowTotal) break; - } } - /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ - /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ - while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) - nBitsToDecrease ++; - totalCost -= 1 << (nBitsToDecrease-1); - if (rankLast[nBitsToDecrease-1] == noSymbol) - rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ - huffNode[rankLast[nBitsToDecrease]].nbBits ++; - if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ - rankLast[nBitsToDecrease] = noSymbol; - else { - rankLast[nBitsToDecrease]--; - if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) - rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } } /* while (totalCost > 0) */ - - while (totalCost < 0) { /* Sometimes, cost correction overshoot */ - if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ - while (huffNode[n].nbBits == maxNbBits) n--; - huffNode[n+1].nbBits--; - assert(n >= 0); - rankLast[1] = (U32)(n+1); - totalCost++; - continue; - } - huffNode[ rankLast[1] + 1 ].nbBits--; - rankLast[1]++; - totalCost ++; - } } } /* there are several too large elements (at least >= 2) */ - - return maxNbBits; -} - -typedef struct { - U32 base; - U32 current; -} rankPos; - -typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; - -#define RANK_POSITION_TABLE_SIZE 32 - -typedef struct { - huffNodeTable huffNodeTbl; - rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; -} HUF_buildCTable_wksp_tables; - -static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) -{ - U32 n; - - memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); - for (n=0; n<=maxSymbolValue; n++) { - U32 r = BIT_highbit32(count[n] + 1); - rankPosition[r].base ++; - } - for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base; - for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base; - for (n=0; n<=maxSymbolValue; n++) { - U32 const c = count[n]; - U32 const r = BIT_highbit32(c+1) + 1; - U32 pos = rankPosition[r].current++; - while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { - huffNode[pos] = huffNode[pos-1]; - pos--; - } - huffNode[pos].count = c; - huffNode[pos].byte = (BYTE)n; - } -} - - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). - */ -#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) - -size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) -{ - HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace; - nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; - nodeElt* const huffNode = huffNode0+1; - int nonNullRank; - int lowS, lowN; - int nodeNb = STARTNODE; - int n, nodeRoot; - - /* safety checks */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) - return ERROR(workSpace_tooSmall); - if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) - return ERROR(maxSymbolValue_tooLarge); - memset(huffNode0, 0, sizeof(huffNodeTable)); - - /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); - - /* init for parents */ - nonNullRank = (int)maxSymbolValue; - while(huffNode[nonNullRank].count == 0) nonNullRank--; - lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; - huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; - huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; - nodeNb++; lowS-=2; - for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); - huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ - - /* create parents */ - while (nodeNb <= nodeRoot) { - int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; - huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; - nodeNb++; - } - - /* distribute weights (unlimited tree height) */ - huffNode[nodeRoot].nbBits = 0; - for (n=nodeRoot-1; n>=STARTNODE; n--) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - for (n=0; n<=nonNullRank; n++) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - - /* enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); - - /* fill result into tree (val, nbBits) */ - { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; - U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; - int const alphabetSize = (int)(maxSymbolValue + 1); - if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ - for (n=0; n<=nonNullRank; n++) - nbPerRank[huffNode[n].nbBits]++; - /* determine stating value per rank */ - { U16 min = 0; - for (n=(int)maxNbBits; n>0; n--) { - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } } - for (n=0; n> 3; -} - -int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { - int bad = 0; - int s; - for (s = 0; s <= (int)maxSymbolValue; ++s) { - bad |= (count[s] != 0) & (CTable[s].nbBits == 0); - } - return !bad; -} - -size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } - -FORCE_INLINE_TEMPLATE void -HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) -{ - BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); -} - -#define HUF_FLUSHBITS(s) BIT_flushBits(s) - -#define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) - -#define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) - -FORCE_INLINE_TEMPLATE size_t -HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - const BYTE* ip = (const BYTE*) src; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - size_t n; - BIT_CStream_t bitC; - - /* init */ - if (dstSize < 8) return 0; /* not enough space to compress */ - { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); - if (HUF_isError(initErr)) return 0; } - - n = srcSize & ~3; /* join to mod 4 */ - switch (srcSize & 3) - { - case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); - HUF_FLUSHBITS_2(&bitC); - /* fall-through */ - case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); - HUF_FLUSHBITS_1(&bitC); - /* fall-through */ - case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); - HUF_FLUSHBITS(&bitC); - /* fall-through */ - case 0 : /* fall-through */ - default: break; - } - - for (; n>0; n-=4) { /* note : n&3==0 at this stage */ - HUF_encodeSymbol(&bitC, ip[n- 1], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 2], CTable); - HUF_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 3], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 4], CTable); - HUF_FLUSHBITS(&bitC); - } - - return BIT_closeCStream(&bitC); -} - -#if DYNAMIC_BMI2 - -static TARGET_ATTRIBUTE("bmi2") size_t -HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} - -static size_t -HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} - -static size_t -HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) -{ - if (bmi2) { - return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); - } - return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); -} - -#else - -static size_t -HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) -{ - (void)bmi2; - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} - -#endif - -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); -} - - -static size_t -HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, int bmi2) -{ - size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ - const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - - if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ - if (srcSize < 12) return 0; /* no saving possible : too small input */ - op += 6; /* jumpTable */ - - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart+2, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart+4, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - assert(ip <= iend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); - if (cSize==0) return 0; - op += cSize; - } - - return (size_t)(op-ostart); -} - -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) -{ - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); -} - -typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; - -static size_t HUF_compressCTable_internal( - BYTE* const ostart, BYTE* op, BYTE* const oend, - const void* src, size_t srcSize, - HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) -{ - size_t const cSize = (nbStreams==HUF_singleStream) ? - HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : - HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); - if (HUF_isError(cSize)) { return cSize; } - if (cSize==0) { return 0; } /* uncompressible */ - op += cSize; - /* check compressibility */ - assert(op >= ostart); - if ((size_t)(op-ostart) >= srcSize-1) { return 0; } - return (size_t)(op-ostart); -} - -typedef struct { - unsigned count[HUF_SYMBOLVALUE_MAX + 1]; - HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; - HUF_buildCTable_wksp_tables buildCTable_wksp; -} HUF_compress_tables_t; - -/* HUF_compress_internal() : - * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ -static size_t -HUF_compress_internal (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - HUF_nbStreams_e nbStreams, - void* workSpace, size_t wkspSize, - HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, - const int bmi2) -{ - HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - - HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); - - /* checks & inits */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); - if (!srcSize) return 0; /* Uncompressed */ - if (!dstSize) return 0; /* cannot fit anything within dst budget */ - if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ - if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); - if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; - if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; - - /* Heuristic : If old table is valid, use it for small inputs */ - if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } - - /* Scan input and build symbol stats */ - { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); - if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ - if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ - } - - /* Check validity of previous table */ - if ( repeat - && *repeat == HUF_repeat_check - && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ - if (preferRepeat && repeat && *repeat != HUF_repeat_none) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } - - /* Build Huffman Tree */ - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, - maxSymbolValue, huffLog, - &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); - CHECK_F(maxBits); - huffLog = (U32)maxBits; - /* Zero unused symbols in CTable, so we can check it for validity */ - memset(table->CTable + (maxSymbolValue + 1), 0, - sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); - } - - /* Write table description header */ - { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); - /* Check if using previous huffman table is beneficial */ - if (repeat && *repeat != HUF_repeat_none) { - size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); - size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); - if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } } - - /* Use the new huffman table */ - if (hSize + 12ul >= srcSize) { return 0; } - op += hSize; - if (repeat) { *repeat = HUF_repeat_none; } - if (oldHufTable) - memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ - } - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, table->CTable, bmi2); -} - - -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); -} - -size_t HUF_compress1X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, hufTable, - repeat, preferRepeat, bmi2); -} - -size_t HUF_compress1X (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) -{ - unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; - return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); -} - -/* HUF_compress4X_repeat(): - * compress input using 4 streams. - * provide workspace to generate compression tables */ -size_t HUF_compress4X_wksp (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); -} - -/* HUF_compress4X_repeat(): - * compress input using 4 streams. - * re-use an existing huffman compression table */ -size_t HUF_compress4X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, - hufTable, repeat, preferRepeat, bmi2); -} - -size_t HUF_compress2 (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) -{ - unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; - return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); -} - -size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/*-************************************* -* Dependencies -***************************************/ -#include /* INT_MAX */ -#include /* memset */ - - /* HIST_countFast_wksp */ - - - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* This header contains definitions - * that shall **only** be used by modules within lib/compress. - */ - -#ifndef ZSTD_COMPRESS_H -#define ZSTD_COMPRESS_H - -/*-************************************* -* Dependencies -***************************************/ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_CCOMMON_H_MODULE -#define ZSTD_CCOMMON_H_MODULE - -/* this module contains definitions which must be identical - * across compression, decompression and dictBuilder. - * It also contains a few functions useful to at least 2 of them - * and which benefit from being inlined */ - -/*-************************************* -* Dependencies -***************************************/ -#ifdef __aarch64__ -#include -#endif - - - /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ -#ifndef ZSTD_H_235446 -#define ZSTD_H_235446 - -/* ====== Dependency ======*/ -#include /* INT_MAX */ -#include /* size_t */ - - -/* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define ZSTDLIB_VISIBILITY -# endif -#endif -#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY -#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define ZSTDLIB_API ZSTDLIB_VISIBILITY -#endif - -namespace duckdb_zstd { - -/******************************************************************************* - Introduction - - zstd, short for Zstandard, is a fast lossless compression algorithm, targeting - real-time compression scenarios at zlib-level and better compression ratios. - The zstd compression library provides in-memory compression and decompression - functions. - - The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), - which is currently 22. Levels >= 20, labeled `--ultra`, should be used with - caution, as they require more memory. The library also offers negative - compression levels, which extend the range of speed vs. ratio preferences. - The lower the level, the faster the speed (at the cost of compression). - - Compression can be done in: - - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit context) - - unbounded multiple steps (described as Streaming compression) - - The compression ratio achievable on small data can be highly improved using - a dictionary. Dictionary compression can be performed in: - - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Bulk-processing - dictionary API) - - Advanced experimental functions can be accessed using - `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. - - Advanced experimental APIs should never be used with a dynamically-linked - library. They are not "stable"; their definitions or signatures may change in - the future. Only static linking is allowed. -*******************************************************************************/ - -/*------ Version ------*/ -#define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 5 - -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) -ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ - -#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE -#define ZSTD_QUOTE(str) #str -#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) -#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ - -/* ************************************* - * Default constant - ***************************************/ -#ifndef ZSTD_CLEVEL_DEFAULT -# define ZSTD_CLEVEL_DEFAULT 3 -#endif - -/* ************************************* - * Constants - ***************************************/ - -/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ -#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ -#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 - -#define ZSTD_BLOCKSIZELOG_MAX 17 -#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); - -/*! ZSTD_decompress() : - * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - * `dstCapacity` is an upper bound of originalSize to regenerate. - * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, - const void* src, size_t compressedSize); - -/*! ZSTD_getFrameContentSize() : requires v1.3.0+ - * `src` should point to the start of a ZSTD encoded frame. - * `srcSize` must be at least as large as the frame header. - * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. - * @return : - decompressed size of `src` frame content, if known - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) - * note 1 : a 0 return value means the frame is valid but "empty". - * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * Optionally, application can rely on some implicit limit, - * as ZSTD_decompress() only needs an upper bound of decompressed size. - * (For example, data could be necessarily cut into blocks <= 16 KB). - * note 3 : decompressed size is always present when compression is completed using single-pass functions, - * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). - * note 4 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure return value fits within application's authorized limits. - * Each application can set its own limits. - * note 6 : This function replaces ZSTD_getDecompressedSize() */ -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); - -/*! ZSTD_getDecompressedSize() : - * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). - * Both functions work the same way, but ZSTD_getDecompressedSize() blends - * "empty", "unknown" and "error" results to the same return value (0), - * while ZSTD_getFrameContentSize() gives them separate return values. - * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ -ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); - -/*! ZSTD_findFrameCompressedSize() : - * `src` should point to the start of a ZSTD frame or skippable frame. - * `srcSize` must be >= first frame size - * @return : the compressed size of the first frame starting at `src`, - * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, - * or an error code if input is invalid */ -ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); - - -/*====== Helper functions ======*/ -#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ -ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ - - -/*************************************** -* Explicit context -***************************************/ -/*= Compression context - * When compressing many times, - * it is recommended to allocate a context just once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Note : re-using context is just a speed / resource optimization. - * It doesn't change the compression ratio, which remains identical. - * Note 2 : In multi-threaded environments, - * use one different context per thread for parallel execution. - */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); -ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); - -/*! ZSTD_compressCCtx() : - * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - * Important : in order to behave similarly to `ZSTD_compress()`, - * this function compresses at requested compression level, - * __ignoring any other parameter__ . - * If any advanced parameter was set using the advanced API, - * they will all be reset. Only `compressionLevel` remains. - */ -ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); - -/*= Decompression context - * When decompressing many times, - * it is recommended to allocate a context only once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Use one context per thread for parallel execution. */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); -ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); - -/*! ZSTD_decompressDCtx() : - * Same as ZSTD_decompress(), - * requires an allocated ZSTD_DCtx. - * Compatible with sticky parameters. - */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - - -/*************************************** -* Advanced compression API -***************************************/ - -/* API design : - * Parameters are pushed one by one into an existing context, - * using ZSTD_CCtx_set*() functions. - * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. - * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . - * - * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). - * - * This API supercedes all other "advanced" API entry points in the experimental section. - * In the future, we expect to remove from experimental API entry points which are redundant with this API. - */ - - -/* Compression strategies, listed from fastest to strongest */ -typedef enum { ZSTD_fast=1, - ZSTD_dfast=2, - ZSTD_greedy=3, - ZSTD_lazy=4, - ZSTD_lazy2=5, - ZSTD_btlazy2=6, - ZSTD_btopt=7, - ZSTD_btultra=8, - ZSTD_btultra2=9 - /* note : new strategies _might_ be added in the future. - Only the order (from fast to strong) is guaranteed */ -} ZSTD_strategy; - - -typedef enum { - - /* compression parameters - * Note: When compressing with a ZSTD_CDict these parameters are superseded - * by the parameters used to construct the ZSTD_CDict. - * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ - ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. - * Note that exact compression parameters are dynamically determined, - * depending on both compression level and srcSize (when known). - * Default level is ZSTD_CLEVEL_DEFAULT==3. - * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. - * Note 1 : it's possible to pass a negative compression level. - * Note 2 : setting a level does not automatically set all other compression parameters - * to default. Setting this will however eventually dynamically impact the compression - * parameters which have not been manually set. The manually set - * ones will 'stick'. */ - /* Advanced compression parameters : - * It's possible to pin down compression parameters to some specific values. - * In which case, these values are no longer dynamically selected by the compressor */ - ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. - * This will set a memory budget for streaming decompression, - * with larger values requiring more memory - * and typically compressing more. - * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. - * Special: value 0 means "use default windowLog". - * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT - * requires explicitly allowing such size at streaming decompression stage. */ - ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. - * Resulting memory usage is (1 << (hashLog+2)). - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. - * Larger tables improve compression ratio of strategies <= dFast, - * and improve speed of strategies > dFast. - * Special: value 0 means "use default hashLog". */ - ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. - * Resulting memory usage is (1 << (chainLog+2)). - * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. - * Larger tables result in better and slower compression. - * This parameter is useless for "fast" strategy. - * It's still useful when using "dfast" strategy, - * in which case it defines a secondary probe table. - * Special: value 0 means "use default chainLog". */ - ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. - * More attempts result in better and slower compression. - * This parameter is useless for "fast" and "dFast" strategies. - * Special: value 0 means "use default searchLog". */ - ZSTD_c_minMatch=105, /* Minimum size of searched matches. - * Note that Zstandard can still find matches of smaller size, - * it just tweaks its search algorithm to look for this size and larger. - * Larger values increase compression and decompression speed, but decrease ratio. - * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. - * Note that currently, for all strategies < btopt, effective minimum is 4. - * , for all strategies > fast, effective maximum is 6. - * Special: value 0 means "use default minMatchLength". */ - ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. - * For strategies btopt, btultra & btultra2: - * Length of Match considered "good enough" to stop search. - * Larger values make compression stronger, and slower. - * For strategy fast: - * Distance between match sampling. - * Larger values make compression faster, and weaker. - * Special: value 0 means "use default targetLength". */ - ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. - * The higher the value of selected strategy, the more complex it is, - * resulting in stronger and slower compression. - * Special: value 0 means "use default strategy". */ - - /* LDM mode parameters */ - ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. - * This parameter is designed to improve compression ratio - * for large inputs, by finding large matches at long distance. - * It increases memory usage and window size. - * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB - * except when expressly set to a different value. */ - ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. - * Larger values increase memory usage and compression ratio, - * but decrease compression speed. - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX - * default: windowlog - 7. - * Special: value 0 means "automatically determine hashlog". */ - ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. - * Larger/too small values usually decrease compression ratio. - * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. - * Special: value 0 means "use default value" (default: 64). */ - ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. - * Larger values improve collision resolution but decrease compression speed. - * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. - * Special: value 0 means "use default value" (default: 3). */ - ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. - * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). - * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. - * Larger values improve compression speed. - * Deviating far from default value will likely result in a compression ratio decrease. - * Special: value 0 means "automatically determine hashRateLog". */ - - /* frame parameters */ - ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) - * Content size must be known at the beginning of compression. - * This is automatically the case when using ZSTD_compress2(), - * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ - ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ - ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ - - /* multi-threading parameters */ - /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). - * They return an error otherwise. */ - ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. - * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() : - * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, - * while compression work is performed in parallel, within worker threads. - * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : - * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). - * More workers improve speed, but also increase memory usage. - * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ - ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. - * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. - * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. - * The minimum size is automatically and transparently enforced. */ - ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. - * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. - * It helps preserve compression ratio, while each job is compressed in parallel. - * This value is enforced only when nbWorkers >= 1. - * Larger values increase compression ratio, but decrease speed. - * Possible values range from 0 to 9 : - * - 0 means "default" : value will be determined by the library, depending on strategy - * - 1 means "no overlap" - * - 9 means "full overlap", using a full window size. - * Each intermediate rank increases/decreases load size by a factor 2 : - * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default - * default value varies between 6 and 9, depending on strategy */ - - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_c_rsyncable - * ZSTD_c_format - * ZSTD_c_forceMaxWindow - * ZSTD_c_forceAttachDict - * ZSTD_c_literalCompressionMode - * ZSTD_c_targetCBlockSize - * ZSTD_c_srcSizeHint - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly; - * also, the enums values themselves are unstable and can still change. - */ - ZSTD_c_experimentalParam1=500, - ZSTD_c_experimentalParam2=10, - ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4=1001, - ZSTD_c_experimentalParam5=1002, - ZSTD_c_experimentalParam6=1003, - ZSTD_c_experimentalParam7=1004 -} ZSTD_cParameter; - -typedef struct { - size_t error; - int lowerBound; - int upperBound; -} ZSTD_bounds; - -/*! ZSTD_cParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - lower and upper bounds, both inclusive - */ -ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); - -/*! ZSTD_CCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_cParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is generally only possible during frame initialization (before starting compression). - * Exception : when using multi-threading mode (nbWorkers >= 1), - * the following parameters can be updated _during_ compression (within same frame): - * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. - * new parameters will be active for next job only (after a flush()). - * @return : an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); - -/*! ZSTD_CCtx_setPledgedSrcSize() : - * Total input data size to be compressed as a single frame. - * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. - * This value will also be controlled at end of frame, and trigger an error if not respected. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. - * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. - * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. - * Note 2 : pledgedSrcSize is only valid once, for the next frame. - * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. - * Note 3 : Whenever all input data is provided and consumed in a single round, - * for example with ZSTD_compress2(), - * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), - * this value is automatically overridden by srcSize instead. - */ -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); - -typedef enum { - ZSTD_reset_session_only = 1, - ZSTD_reset_parameters = 2, - ZSTD_reset_session_and_parameters = 3 -} ZSTD_ResetDirective; - -/*! ZSTD_CCtx_reset() : - * There are 2 different things that can be reset, independently or jointly : - * - The session : will stop compressing current frame, and make CCtx ready to start a new one. - * Useful after an error, or to interrupt any ongoing compression. - * Any internal data not yet flushed is cancelled. - * Compression parameters and dictionary remain unchanged. - * They will be used to compress next frame. - * Resetting session never fails. - * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. - * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) - * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) - * - Both : similar to resetting the session, followed by resetting parameters. - */ -ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); - -/*! ZSTD_compress2() : - * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. - * ZSTD_compress2() always starts a new frame. - * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - The function is always blocking, returns when compression is completed. - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - - -/*************************************** -* Advanced decompression API -***************************************/ - -/* The advanced API pushes parameters one by one into an existing DCtx context. - * Parameters are sticky, and remain valid for all following frames - * using the same DCtx context. - * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). - * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). - * Therefore, no new decompression function is necessary. - */ - -typedef enum { - - ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which - * the streaming API will refuse to allocate memory buffer - * in order to protect the host from unreasonable memory requirements. - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). - * Special: value 0 means "use default maximum windowLog". */ - - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_d_format - * ZSTD_d_stableOutBuffer - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly - */ - ZSTD_d_experimentalParam1=1000, - ZSTD_d_experimentalParam2=1001 - -} ZSTD_dParameter; - -/*! ZSTD_dParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - both lower and upper bounds, inclusive - */ -ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); - -/*! ZSTD_DCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_dParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is only possible during frame initialization (before starting decompression). - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); - -/*! ZSTD_DCtx_reset() : - * Return a DCtx to clean state. - * Session and parameters can be reset jointly or separately. - * Parameters can only be reset when no active frame is being decompressed. - * @return : 0, or an error code, which can be tested with ZSTD_isError() - */ -ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); - - -/**************************** -* Streaming -****************************/ - -typedef struct ZSTD_inBuffer_s { - const void* src; /**< start of input buffer */ - size_t size; /**< size of input buffer */ - size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_inBuffer; - -typedef struct ZSTD_outBuffer_s { - void* dst; /**< start of output buffer */ - size_t size; /**< size of output buffer */ - size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_outBuffer; - - - -/*-*********************************************************************** -* Streaming compression - HowTo -* -* A ZSTD_CStream object is required to track streaming operation. -* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. -* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. -* -* For parallel execution, use one separate ZSTD_CStream per thread. -* -* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. -* -* Parameters are sticky : when starting a new compression on the same context, -* it will re-use the same sticky parameters as previous compression session. -* When in doubt, it's recommended to fully initialize the context before usage. -* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), -* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to -* set more specific parameters, the pledged source size, or load a dictionary. -* -* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to -* consume input stream. The function will automatically update both `pos` -* fields within `input` and `output`. -* Note that the function may not consume the entire input, for example, because -* the output buffer is already full, in which case `input.pos < input.size`. -* The caller must check if input has been entirely consumed. -* If not, the caller must make some room to receive more compressed data, -* and then present again remaining input data. -* note: ZSTD_e_continue is guaranteed to make some forward progress when called, -* but doesn't guarantee maximal forward progress. This is especially relevant -* when compressing with multiple threads. The call won't block if it can -* consume some input, but if it can't it will wait for some, but not all, -* output to be flushed. -* @return : provides a minimum amount of data remaining to be flushed from internal buffers -* or an error code, which can be tested using ZSTD_isError(). -* -* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, -* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. -* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). -* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. -* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the -* operation. -* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will -* block until the flush is complete or the output buffer is full. -* @return : 0 if internal buffers are entirely flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. -* It will perform a flush and write frame epilogue. -* The epilogue is required for decoders to consider a frame completed. -* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. -* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to -* start a new frame. -* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will -* block until the flush is complete or the output buffer is full. -* @return : 0 if frame fully completed and fully flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* *******************************************************************/ - -typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ - /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ -/*===== ZSTD_CStream management functions =====*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); -ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); - -/*===== Streaming compression functions =====*/ -typedef enum { - ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ - ZSTD_e_flush=1, /* flush any data provided so far, - * it creates (at least) one new block, that can be decoded immediately on reception; - * frame will continue: any future data can still reference previously compressed data, improving compression. - * note : multithreaded compression will block to flush as much output as possible. */ - ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. - * note that frame is only closed after compressed data is fully flushed (return value == 0). - * After that point, any additional data starts a new frame. - * note : each frame is independent (does not reference any content from previous frame). - : note : multithreaded compression will block to flush as much output as possible. */ -} ZSTD_EndDirective; - -/*! ZSTD_compressStream2() : - * Behaves about the same as ZSTD_compressStream, with additional control on end directive. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) - * - output->pos must be <= dstCapacity, input->pos must be <= srcSize - * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. - * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. - * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, - * and then immediately returns, just indicating that there is some data remaining to be flushed. - * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. - * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. - * - @return provides a minimum amount of data remaining to be flushed from internal buffers - * or an error code, which can be tested using ZSTD_isError(). - * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. - * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. - * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. - * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), - * only ZSTD_e_end or ZSTD_e_flush operations are allowed. - * Before starting a new compression job, or changing compression parameters, - * it is required to fully flush internal buffers. - */ -ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp); - - -/* These buffer sizes are softly recommended. - * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. - * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), - * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. - * - * However, note that these recommendations are from the perspective of a C caller program. - * If the streaming interface is invoked from some other language, - * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, - * a major performance rule is to reduce crossing such interface to an absolute minimum. - * It's not rare that performance ends being spent more into the interface, rather than compression itself. - * In which cases, prefer using large buffers, as large as practical, - * for both input and output, to reduce the nb of roundtrips. - */ -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ - - -/* ***************************************************************************** - * This following is a legacy streaming API. - * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). - * It is redundant, but remains fully supported. - * Advanced parameters and dictionary compression can only be used through the - * new API. - ******************************************************************************/ - -/*! - * Equivalent to: - * - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - */ -ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -/*! - * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). - * NOTE: The return value is different. ZSTD_compressStream() returns a hint for - * the next read size (if non-zero and not an error). ZSTD_compressStream2() - * returns the minimum nb of bytes left to flush (if non-zero and not an error). - */ -ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ -ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ -ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); - - -/*-*************************************************************************** -* Streaming decompression - HowTo -* -* A ZSTD_DStream object is required to track streaming operations. -* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. -* -* Use ZSTD_initDStream() to start a new decompression operation. -* @return : recommended first input size -* Alternatively, use advanced API to set specific properties. -* -* Use ZSTD_decompressStream() repetitively to consume your input. -* The function will update both `pos` fields. -* If `input.pos < input.size`, some input has not been consumed. -* It's up to the caller to present again remaining data. -* The function tries to flush all data decoded immediately, respecting output buffer size. -* If `output.pos < output.size`, decoder has flushed everything it could. -* But if `output.pos == output.size`, there might be some data left within internal buffers., -* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. -* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. -* @return : 0 when a frame is completely decoded and fully flushed, -* or an error code, which can be tested using ZSTD_isError(), -* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : -* the return value is a suggested next input size (just a hint for better latency) -* that will never request more than the remaining frame size. -* *******************************************************************************/ - -typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ - /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ -/*===== ZSTD_DStream management functions =====*/ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); -ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); - -/*===== Streaming decompression functions =====*/ - -/* This function is redundant with the advanced API and equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, NULL); - */ -ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); - -ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); - -ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ - - -/************************** -* Simple dictionary API -***************************/ -/*! ZSTD_compress_usingDict() : - * Compression at an explicit compression level using a Dictionary. - * A dictionary can be any arbitrary data segment (also called a prefix), - * or a buffer with specified information (see dict/zdict.h). - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - int compressionLevel); - -/*! ZSTD_decompress_usingDict() : - * Decompression using a known Dictionary. - * Dictionary must be identical to the one used during compression. - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize); - - -/*********************************** - * Bulk processing dictionary API - **********************************/ -typedef struct ZSTD_CDict_s ZSTD_CDict; - -/*! ZSTD_createCDict() : - * When compressing multiple messages or blocks using the same dictionary, - * it's recommended to digest the dictionary only once, since it's a costly operation. - * ZSTD_createCDict() will create a state from digesting a dictionary. - * The resulting state can be used for future compression operations with very limited startup cost. - * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. - * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. - * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, - * in which case the only thing that it transports is the @compressionLevel. - * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, - * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, - int compressionLevel); - -/*! ZSTD_freeCDict() : - * Function frees memory allocated by ZSTD_createCDict(). */ -ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); - -/*! ZSTD_compress_usingCDict() : - * Compression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. - * Note : compression level is _decided at dictionary creation time_, - * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict); - - -typedef struct ZSTD_DDict_s ZSTD_DDict; - -/*! ZSTD_createDDict() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); - -/*! ZSTD_freeDDict() : - * Function frees memory allocated with ZSTD_createDDict() */ -ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); - -/*! ZSTD_decompress_usingDDict() : - * Decompression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); - - -/******************************** - * Dictionary helper functions - *******************************/ - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); - - -/******************************************************************************* - * Advanced dictionary and prefix API - * - * This API allows dictionaries to be used with ZSTD_compress2(), - * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and - * only reset with the context is reset with ZSTD_reset_parameters or - * ZSTD_reset_session_and_parameters. Prefixes are single-use. - ******************************************************************************/ - - -/*! ZSTD_CCtx_loadDictionary() : - * Create an internal CDict from `dict` buffer. - * Decompression will have to use same dictionary. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). - * Note 2 : Loading a dictionary involves building tables. - * It's also a CPU consuming operation, with non-negligible impact on latency. - * Tables are dependent on compression parameters, and for this reason, - * compression parameters can no longer be changed after loading a dictionary. - * Note 3 :`dict` content will be copied internally. - * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. - * In such a case, dictionary buffer must outlive its users. - * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() - * to precisely select how dictionary content must be interpreted. */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); - -/*! ZSTD_CCtx_refCDict() : - * Reference a prepared dictionary, to be used for all next compressed frames. - * Note that compression parameters are enforced from within CDict, - * and supersede any compression parameter previously set within CCtx. - * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. - * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. - * The dictionary will remain valid for future compressed frames using same CCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Referencing a NULL CDict means "return to no-dictionary mode". - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ -ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); - -/*! ZSTD_CCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) for next compressed frame. - * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). - * Decompression will need same prefix to properly regenerate data. - * Compressing with a prefix is similar in outcome as performing a diff and compressing it, - * but performs much faster, especially during decompression (compression speed is tunable with compression level). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - * Note 1 : Prefix buffer is referenced. It **must** outlive compression. - * Its content must remain unmodified during compression. - * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, - * ensure that the window size is large enough to contain the entire source. - * See ZSTD_c_windowLog. - * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. - * It's a CPU consuming operation, with non-negligible impact on latency. - * If there is a need to use the same prefix multiple times, consider loadDictionary instead. - * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). - * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, - const void* prefix, size_t prefixSize); - -/*! ZSTD_DCtx_loadDictionary() : - * Create an internal DDict from dict buffer, - * to be used to decompress next frames. - * The dictionary remains valid for all future frames, until explicitly invalidated. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Loading a dictionary involves building tables, - * which has a non-negligible impact on CPU usage and latency. - * It's recommended to "load once, use many times", to amortize the cost - * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. - * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. - * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of - * how dictionary content is loaded and interpreted. - */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); - -/*! ZSTD_DCtx_refDDict() : - * Reference a prepared dictionary, to be used to decompress next frames. - * The dictionary remains active for decompression of future frames using same DCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Special: referencing a NULL DDict means "return to no-dictionary mode". - * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. - */ -ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); - -/*! ZSTD_DCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) to decompress next frame. - * This is the reverse operation of ZSTD_CCtx_refPrefix(), - * and must use the same prefix as the one used during compression. - * Prefix is **only used once**. Reference is discarded at end of frame. - * End of frame is reached when ZSTD_decompressStream() returns 0. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary - * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. - * Prefix buffer must remain unmodified up to the end of frame, - * reached when ZSTD_decompressStream() returns 0. - * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). - * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) - * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. - * A full dictionary is more costly, as it requires building tables. - */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, - const void* prefix, size_t prefixSize); - -/* === Memory management === */ - -/*! ZSTD_sizeof_*() : - * These functions give the _current_ memory usage of selected object. - * Note that object memory usage can evolve (increase or decrease) over time. */ -ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); -ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); -ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); -ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); - -} -#endif /* ZSTD_H_235446 */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - - -/* ************************************************************************************** - * ADVANCED AND EXPERIMENTAL FUNCTIONS - **************************************************************************************** - * The definitions in the following section are considered experimental. - * They are provided for advanced scenarios. - * They should never be used with a dynamic library, as prototypes may change in the future. - * Use them only in association with static linking. - * ***************************************************************************************/ - -#ifndef ZSTD_H_ZSTD_STATIC_LINKING_ONLY -#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY - -namespace duckdb_zstd { - -/**************************************************************************************** - * experimental API (static linking only) - **************************************************************************************** - * The following symbols and constants - * are not planned to join "stable API" status in the near future. - * They can still change in future versions. - * Some of them are planned to remain in the static_only section indefinitely. - * Some of them might be removed in the future (especially when redundant with existing stable functions) - * ***************************************************************************************/ - -#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ -#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) -#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ -#define ZSTD_SKIPPABLEHEADERSIZE 8 - -/* compression parameter bounds */ -#define ZSTD_WINDOWLOG_MAX_32 30 -#define ZSTD_WINDOWLOG_MAX_64 31 -#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX_32 29 -#define ZSTD_CHAINLOG_MAX_64 30 -#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ -#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX -#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ -#define ZSTD_STRATEGY_MIN ZSTD_fast -#define ZSTD_STRATEGY_MAX ZSTD_btultra2 - - -#define ZSTD_OVERLAPLOG_MIN 0 -#define ZSTD_OVERLAPLOG_MAX 9 - -#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame - * requiring larger than (1< 3, then this is seqDef.offset - 3 - * If seqDef.offset < 3, then this is the corresponding repeat offset - * But if seqDef.offset < 3 and litLength == 0, this is the - * repeat offset before the corresponding repeat offset - * And if seqDef.offset == 3 and litLength == 0, this is the - * most recent repeat offset - 1 - */ - unsigned int offset; - unsigned int litLength; /* Literal length */ - unsigned int matchLength; /* Match length */ - /* 0 when seq not rep and seqDef.offset otherwise - * when litLength == 0 this will be <= 4, otherwise <= 3 like normal - */ - unsigned int rep; -} ZSTD_Sequence; - -typedef struct { - unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ - unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ - unsigned hashLog; /**< dispatch table : larger == faster, more memory */ - unsigned searchLog; /**< nb of searches : larger == more compression, slower */ - unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ - unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ - ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ -} ZSTD_compressionParameters; - -typedef struct { - int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ - int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ - int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ -} ZSTD_frameParameters; - -typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; - -typedef enum { - ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ - ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ - ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ -} ZSTD_dictContentType_e; - -typedef enum { - ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ - ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ -} ZSTD_dictLoadMethod_e; - -typedef enum { - ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ - ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. - * Useful to save 4 bytes per generated frame. - * Decoder cannot recognise automatically this format, requiring this instruction. */ -} ZSTD_format_e; - -typedef enum { - /* Note: this enum and the behavior it controls are effectively internal - * implementation details of the compressor. They are expected to continue - * to evolve and should be considered only in the context of extremely - * advanced performance tuning. - * - * Zstd currently supports the use of a CDict in three ways: - * - * - The contents of the CDict can be copied into the working context. This - * means that the compression can search both the dictionary and input - * while operating on a single set of internal tables. This makes - * the compression faster per-byte of input. However, the initial copy of - * the CDict's tables incurs a fixed cost at the beginning of the - * compression. For small compressions (< 8 KB), that copy can dominate - * the cost of the compression. - * - * - The CDict's tables can be used in-place. In this model, compression is - * slower per input byte, because the compressor has to search two sets of - * tables. However, this model incurs no start-up cost (as long as the - * working context's tables can be reused). For small inputs, this can be - * faster than copying the CDict's tables. - * - * - The CDict's tables are not used at all, and instead we use the working - * context alone to reload the dictionary and use params based on the source - * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). - * This method is effective when the dictionary sizes are very small relative - * to the input size, and the input size is fairly large to begin with. - * - * Zstd has a simple internal heuristic that selects which strategy to use - * at the beginning of a compression. However, if experimentation shows that - * Zstd is making poor choices, it is possible to override that choice with - * this enum. - */ - ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ - ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ - ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ -} ZSTD_dictAttachPref_e; - -typedef enum { - ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. - * Negative compression levels will be uncompressed, and positive compression - * levels will be compressed. */ - ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be - * emitted if Huffman compression is not profitable. */ - ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ -} ZSTD_literalCompressionMode_e; - - -/*************************************** -* Frame size functions -***************************************/ - -/*! ZSTD_findDecompressedSize() : - * `src` should point to the start of a series of ZSTD encoded and/or skippable frames - * `srcSize` must be the _exact_ size of this series - * (i.e. there should be a frame boundary at `src + srcSize`) - * @return : - decompressed size of all data in all successive frames - * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN - * - if an error occurred: ZSTD_CONTENTSIZE_ERROR - * - * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * note 2 : decompressed size is always present when compression is done with ZSTD_compress() - * note 3 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure result fits within application's authorized limits. - * Each application can set its own limits. - * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to - * read each contained frame header. This is fast as most of the data is skipped, - * however it does mean that all frame data must be present and valid. */ -ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); - -/*! ZSTD_decompressBound() : - * `src` should point to the start of a series of ZSTD encoded and/or skippable frames - * `srcSize` must be the _exact_ size of this series - * (i.e. there should be a frame boundary at `src + srcSize`) - * @return : - upper-bound for the decompressed size of all data in all successive frames - * - if an error occured: ZSTD_CONTENTSIZE_ERROR - * - * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. - * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. - * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. - * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: - * upper-bound = # blocks * min(128 KB, Window_Size) - */ -ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); - -/*! ZSTD_frameHeaderSize() : - * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. - * @return : size of the Frame Header, - * or an error code (if srcSize is too small) */ -ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); - -/*! ZSTD_getSequences() : - * Extract sequences from the sequence store - * zc can be used to insert custom compression params. - * This function invokes ZSTD_compress2 - * @return : number of sequences extracted - */ -ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); - - -/*************************************** -* Memory management -***************************************/ - -/*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage - * of a future {D,C}Ctx, before its creation. - * - * ZSTD_estimateCCtxSize() will provide a memory budget large enough - * for any compression level up to selected one. - * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate - * does not include space for a window buffer. - * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. - * The estimate will assume the input may be arbitrarily large, - * which is the worst case. - * - * When srcSize can be bound by a known and rather "small" value, - * this fact can be used to provide a tighter estimation - * because the CCtx compression context will need less memory. - * This tighter estimation can be provided by more advanced functions - * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), - * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). - * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. - * - * Note 2 : only single-threaded compression is supported. - * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - */ -ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); - -/*! ZSTD_estimateCStreamSize() : - * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. - * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note : CStream size estimation is only correct for single-threaded compression. - * ZSTD_DStream memory budget depends on window Size. - * This information can be passed manually, using ZSTD_estimateDStreamSize, - * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); - * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), - * an internal ?Dict will be created, which additional size is not estimated here. - * In this case, get total size by adding ZSTD_estimate?DictSize */ -ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); - -/*! ZSTD_estimate?DictSize() : - * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). - * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). - * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. - */ -ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); -ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); - -/*! ZSTD_initStatic*() : - * Initialize an object using a pre-allocated fixed-size buffer. - * workspace: The memory area to emplace the object into. - * Provided pointer *must be 8-bytes aligned*. - * Buffer must outlive object. - * workspaceSize: Use ZSTD_estimate*Size() to determine - * how large workspace must be to support target scenario. - * @return : pointer to object (same address as workspace, just different type), - * or NULL if error (size too small, incorrect alignment, etc.) - * Note : zstd will never resize nor malloc() when using a static buffer. - * If the object requires more memory than available, - * zstd will just error out (typically ZSTD_error_memory_allocation). - * Note 2 : there is no corresponding "free" function. - * Since workspace is allocated externally, it must be freed externally too. - * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level - * into its associated cParams. - * Limitation 1 : currently not compatible with internal dictionary creation, triggered by - * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). - * Limitation 2 : static cctx currently not compatible with multi-threading. - * Limitation 3 : static dctx is incompatible with legacy support. - */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ - -ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ - -ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams); - -ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType); - - -/*! Custom memory allocation : - * These prototypes make it possible to pass your own allocation/free functions. - * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. - * All allocation/free operations will be completed using these custom variants instead of regular ones. - */ -typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); -typedef void (*ZSTD_freeFunction) (void* opaque, void* address); -typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; - -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); - -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams, - ZSTD_customMem customMem); - -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem); - - - -/*************************************** -* Advanced compression functions -***************************************/ - -/*! ZSTD_createCDict_byReference() : - * Create a digested dictionary for compression - * Dictionary content is just referenced, not duplicated. - * As a consequence, `dictBuffer` **must** outlive CDict, - * and its content must remain unmodified throughout the lifetime of CDict. - * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); - -/*! ZSTD_getCParams() : - * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. - * `estimatedSrcSize` value is optional, select 0 if not known */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); - -/*! ZSTD_getParams() : - * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. - * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ -ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); - -/*! ZSTD_checkCParams() : - * Ensure param values remain within authorized range. - * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); - -/*! ZSTD_adjustCParams() : - * optimize params for a given `srcSize` and `dictSize`. - * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. - * `dictSize` must be `0` when there is no dictionary. - * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. - * This function never fails (wide contract) */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); - -/*! ZSTD_compress_advanced() : - * Note : this function is now DEPRECATED. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); - -/*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now REDUNDANT. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning in some future version */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams); - - -/*! ZSTD_CCtx_loadDictionary_byReference() : - * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. - * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); - -/*! ZSTD_CCtx_loadDictionary_advanced() : - * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over - * how to load the dictionary (by copy ? by reference ?) - * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); - -/*! ZSTD_CCtx_refPrefix_advanced() : - * Same as ZSTD_CCtx_refPrefix(), but gives finer control over - * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); - -/* === experimental parameters === */ -/* these parameters can be used with ZSTD_setParameter() - * they are not guaranteed to remain supported in the future */ - - /* Enables rsyncable mode, - * which makes compressed files more rsync friendly - * by adding periodic synchronization points to the compressed data. - * The target average block size is ZSTD_c_jobSize / 2. - * It's possible to modify the job size to increase or decrease - * the granularity of the synchronization point. - * Once the jobSize is smaller than the window size, - * it will result in compression ratio degradation. - * NOTE 1: rsyncable mode only works when multithreading is enabled. - * NOTE 2: rsyncable performs poorly in combination with long range mode, - * since it will decrease the effectiveness of synchronization points, - * though mileage may vary. - * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. - * If the selected compression level is already running significantly slower, - * the overall speed won't be significantly impacted. - */ - #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 - -/* Select a compression format. - * The value must be of type ZSTD_format_e. - * See ZSTD_format_e enum definition for details */ -#define ZSTD_c_format ZSTD_c_experimentalParam2 - -/* Force back-reference distances to remain < windowSize, - * even when referencing into Dictionary content (default:0) */ -#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 - -/* Controls whether the contents of a CDict - * are used in place, or copied into the working context. - * Accepts values from the ZSTD_dictAttachPref_e enum. - * See the comments on that enum for an explanation of the feature. */ -#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 - -/* Controls how the literals are compressed (default is auto). - * The value must be of type ZSTD_literalCompressionMode_e. - * See ZSTD_literalCompressionMode_t enum definition for details. - */ -#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 - -/* Tries to fit compressed block size to be around targetCBlockSize. - * No target when targetCBlockSize == 0. - * There is no guarantee on compressed block size (default:0) */ -#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 - -/* User's best guess of source size. - * Hint is not valid when srcSizeHint == 0. - * There is no guarantee that hint is close to actual source size, - * but compression ratio may regress significantly if guess considerably underestimates */ -#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 - -/*! ZSTD_CCtx_getParameter() : - * Get the requested compression parameter value, selected by enum ZSTD_cParameter, - * and store it into int* value. - * @return : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); - - -/*! ZSTD_CCtx_params : - * Quick howto : - * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure - * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into - * an existing ZSTD_CCtx_params structure. - * This is similar to - * ZSTD_CCtx_setParameter(). - * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to - * an existing CCtx. - * These parameters will be applied to - * all subsequent frames. - * - ZSTD_compressStream2() : Do compression using the CCtx. - * - ZSTD_freeCCtxParams() : Free the memory. - * - * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() - * for static allocation of CCtx for single-threaded compression. - */ -ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); -ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); - -/*! ZSTD_CCtxParams_reset() : - * Reset params to default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); - -/*! ZSTD_CCtxParams_init() : - * Initializes the compression parameters of cctxParams according to - * compression level. All other parameters are reset to their default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); - -/*! ZSTD_CCtxParams_init_advanced() : - * Initializes the compression and frame parameters of cctxParams according to - * params. All other parameters are reset to their default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); - -/*! ZSTD_CCtxParams_setParameter() : - * Similar to ZSTD_CCtx_setParameter. - * Set one compression parameter, selected by enum ZSTD_cParameter. - * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); - -/*! ZSTD_CCtxParams_getParameter() : - * Similar to ZSTD_CCtx_getParameter. - * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); - -/*! ZSTD_CCtx_setParametersUsingCCtxParams() : - * Apply a set of ZSTD_CCtx_params to the compression context. - * This can be done even after compression is started, - * if nbWorkers==0, this will have no impact until a new compression is started. - * if nbWorkers>=1, new parameters will be picked up at next job, - * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). - */ -ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( - ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); - -/*! ZSTD_compressStream2_simpleArgs() : - * Same as ZSTD_compressStream2(), - * but using only integral types as arguments. - * This variant might be helpful for binders from dynamic languages - * which have troubles handling structures containing memory pointers. - */ -ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp); - - -/*************************************** -* Advanced decompression functions -***************************************/ - -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); - -/*! ZSTD_createDDict_byReference() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * Dictionary content is referenced, and therefore stays in dictBuffer. - * It is important that dictBuffer outlives DDict, - * it must remain read accessible throughout the lifetime of DDict */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); - -/*! ZSTD_DCtx_loadDictionary_byReference() : - * Same as ZSTD_DCtx_loadDictionary(), - * but references `dict` content instead of copying it into `dctx`. - * This saves memory if `dict` remains around., - * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); - -/*! ZSTD_DCtx_loadDictionary_advanced() : - * Same as ZSTD_DCtx_loadDictionary(), - * but gives direct control over - * how to load the dictionary (by copy ? by reference ?) - * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); - -/*! ZSTD_DCtx_refPrefix_advanced() : - * Same as ZSTD_DCtx_refPrefix(), but gives finer control over - * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); - -/*! ZSTD_DCtx_setMaxWindowSize() : - * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. - * This protects a decoder context from reserving too much memory for itself (potential attack scenario). - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); - -/* ZSTD_d_format - * experimental parameter, - * allowing selection between ZSTD_format_e input compression formats - */ -#define ZSTD_d_format ZSTD_d_experimentalParam1 -/* ZSTD_d_stableOutBuffer - * Experimental parameter. - * Default is 0 == disabled. Set to 1 to enable. - * - * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same - * between calls, except for the modifications that zstd makes to pos (the - * caller must not modify pos). This is checked by the decompressor, and - * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer - * MUST be large enough to fit the entire decompressed frame. This will be - * checked when the frame content size is known. The data in the ZSTD_outBuffer - * in the range [dst, dst + pos) MUST not be modified during decompression - * or you will get data corruption. - * - * When this flags is enabled zstd won't allocate an output buffer, because - * it can write directly to the ZSTD_outBuffer, but it will still allocate - * an input buffer large enough to fit any compressed block. This will also - * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. - * If you need to avoid the input buffer allocation use the buffer-less - * streaming API. - * - * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using - * this flag is ALWAYS memory safe, and will never access out-of-bounds - * memory. However, decompression WILL fail if you violate the preconditions. - * - * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST - * not be modified during decompression or you will get data corruption. This - * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate - * matches. Normally zstd maintains its own buffer for this purpose, but passing - * this flag tells zstd to use the user provided buffer. - */ -#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 - -/*! ZSTD_DCtx_setFormat() : - * Instruct the decoder context about what kind of data to decode next. - * This instruction is mandatory to decode data without a fully-formed header, - * such ZSTD_f_zstd1_magicless for example. - * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); - -/*! ZSTD_decompressStream_simpleArgs() : - * Same as ZSTD_decompressStream(), - * but using only integral types as arguments. - * This can be helpful for binders from dynamic languages - * which have troubles handling structures containing memory pointers. - */ -ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( - ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos); - - -/******************************************************************** -* Advanced streaming functions -* Warning : most of these functions are now redundant with the Advanced API. -* Once Advanced API reaches "stable" status, -* redundant functions will be deprecated, and then at some point removed. -********************************************************************/ - -/*===== Advanced Streaming compression functions =====*/ -/**! ZSTD_initCStream_srcSize() : - * This function is deprecated, and equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * - * pledgedSrcSize must be correct. If it is not known at init time, use - * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, - * "0" also disables frame content size field. It may be enabled in the future. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, - int compressionLevel, - unsigned long long pledgedSrcSize); - -/**! ZSTD_initCStream_usingDict() : - * This function is deprecated, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * - * Creates of an internal CDict (incompatible with static CCtx), except if - * dict == NULL or dictSize < 8, in which case no dict is used. - * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if - * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - int compressionLevel); - -/**! ZSTD_initCStream_advanced() : - * This function is deprecated, and is approximately equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd parameter and leave the rest as-is. - * for ((param, value) : params) { - * ZSTD_CCtx_setParameter(zcs, param, value); - * } - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * - * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. - * pledgedSrcSize must be correct. - * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, - unsigned long long pledgedSrcSize); - -/**! ZSTD_initCStream_usingCDict() : - * This function is deprecated, and equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, cdict); - * - * note : cdict will just be referenced, and must outlive compression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); - -/**! ZSTD_initCStream_usingCDict_advanced() : - * This function is DEPRECATED, and is approximately equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. - * for ((fParam, value) : fParams) { - * ZSTD_CCtx_setParameter(zcs, fParam, value); - * } - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_refCDict(zcs, cdict); - * - * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. - * pledgedSrcSize must be correct. If srcSize is not known at init time, use - * value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize); - -/*! ZSTD_resetCStream() : - * This function is deprecated, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * - * start a new frame, using same parameters from previous frame. - * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. - * Note that zcs must be init at least once before using ZSTD_resetCStream(). - * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. - * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. - * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, - * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. - * @return : 0, or an error code (which can be tested using ZSTD_isError()) - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); - - -typedef struct { - unsigned long long ingested; /* nb input bytes read and buffered */ - unsigned long long consumed; /* nb input bytes actually compressed */ - unsigned long long produced; /* nb of compressed bytes generated and buffered */ - unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ - unsigned currentJobID; /* MT only : latest started job nb */ - unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ -} ZSTD_frameProgression; - -/* ZSTD_getFrameProgression() : - * tells how much data has been ingested (read from input) - * consumed (input actually compressed) and produced (output) for current frame. - * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. - * Aggregates progression inside active worker threads. - */ -ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); - -/*! ZSTD_toFlushNow() : - * Tell how many bytes are ready to be flushed immediately. - * Useful for multithreading scenarios (nbWorkers >= 1). - * Probe the oldest active job, defined as oldest job not yet entirely flushed, - * and check its output buffer. - * @return : amount of data stored in oldest job and ready to be flushed immediately. - * if @return == 0, it means either : - * + there is no active job (could be checked with ZSTD_frameProgression()), or - * + oldest job is still actively compressing data, - * but everything it has produced has also been flushed so far, - * therefore flush speed is limited by production speed of oldest job - * irrespective of the speed of concurrent (and newer) jobs. - */ -ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); - - -/*===== Advanced Streaming decompression functions =====*/ -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); - * - * note: no dictionary will be used if dict == NULL or dictSize < 8 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); - -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, ddict); - * - * note : ddict is referenced, it must outlive decompression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); - -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * - * re-use decompression parameters from previous init; saves dictionary loading - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); - - -/********************************************************************* -* Buffer-less and synchronous inner streaming functions -* -* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with several restrictions, documented below. -* Prefer normal streaming API for an easier experience. -********************************************************************* */ - -/** - Buffer-less streaming compression (synchronous mode) - - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. - - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() - - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. - - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. - - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. - Worst case evaluation is provided by ZSTD_compressBound(). - ZSTD_compressContinue() doesn't guarantee recover after a failed compression. - - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). - It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) - - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. - In which case, it will "discard" the relevant memory section from its history. - - Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. -*/ - -/*===== Buffer-less streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ - -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - -/*- - Buffer-less streaming decompression (synchronous mode) - - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. - - First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). - Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. - Data fragment must be large enough to ensure successful decoding. - `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. - @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - - It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, - such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). - Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. - As a consequence, check that values remain within valid application range. - For example, do not allocate memory blindly, check that `windowSize` is within expectation. - Each application can set its own limits, depending on local restrictions. - For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. - - ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. - ZSTD_decompressContinue() is very sensitive to contiguity, - if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, - or that previous contiguous segment is large enough to properly handle maximum back-reference distance. - There are multiple ways to guarantee this condition. - - The most memory efficient way is to use a round buffer of sufficient size. - Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), - which can @return an error code if required value is too large for current system (in 32-bits mode). - In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, - up to the moment there is not enough room left in the buffer to guarantee decoding another full block, - which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. - At which point, decoding can resume from the beginning of the buffer. - Note that already decoded data stored in the buffer should be flushed before being overwritten. - - There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. - - Finally, if you control the compression process, you can also ignore all buffer size rules, - as long as the encoder and decoder progress in "lock-step", - aka use exactly the same buffer sizes, break contiguity at the same place, etc. - - Once buffers are setup, start decompression, with ZSTD_decompressBegin(). - If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). - - Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). - - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - Context can then be reset to start a new decompression. - - Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). - This information is not required to properly decode a frame. - - == Special case : skippable frames == - - Skippable frames allow integration of user-defined data into a flow of concatenated frames. - Skippable frames will be ignored (skipped) by decompressor. - The format of skippable frames is as follows : - a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F - b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - c) Frame Content - any content (User Data) of length equal to Frame Size - For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. - For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. -*/ - -/*===== Buffer-less streaming decompression functions =====*/ -typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; -typedef struct { - unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ - unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ - unsigned blockSizeMax; - ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ - unsigned headerSize; - unsigned dictID; - unsigned checksumFlag; -} ZSTD_frameHeader; - -/*! ZSTD_getFrameHeader() : - * decode Frame Header, or requires larger `srcSize`. - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ -/*! ZSTD_getFrameHeader_advanced() : - * same as ZSTD_getFrameHeader(), - * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ - -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); - -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -/* misc */ -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); -typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - - - - -/* ============================ */ -/** Block level API */ -/* ============================ */ - -/*! - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). - But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. - - A few rules to respect : - - Compressing and decompressing require a context structure - + Use ZSTD_createCCtx() and ZSTD_createDCtx() - - It is necessary to init context before starting - + compression : any ZSTD_compressBegin*() variant, including with dictionary - + decompression : any ZSTD_decompressBegin*() variant, including with dictionary - + copyCCtx() and copyDCtx() can be used too - - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB - + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, consider using regular ZSTD_compress() instead. - Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! - ===> In which case, nothing is produced into `dst` ! - + User __must__ test for such outcome and deal directly with uncompressed data - + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. - Doing so would mess up with statistics history, leading to potential data corruption. - + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! - + In case of multiple successive blocks, should some of them be uncompressed, - decoder must be informed of their existence in order to follow proper history. - Use ZSTD_insertBlock() for such a case. -*/ - -/*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ - -} - -#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ - - -// LICENSE_CHANGE_END - - - - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * xxHash - Extremely Fast Hash algorithm - * Header File - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - xxHash source repository : https://github.com/Cyan4973/xxHash - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -*/ - -/* Notice extracted from xxHash homepage : - -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. -It also successfully passes all tests from the SMHasher suite. - -Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) - -Name Speed Q.Score Author -xxHash 5.4 GB/s 10 -CrapWow 3.2 GB/s 2 Andrew -MumurHash 3a 2.7 GB/s 10 Austin Appleby -SpookyHash 2.0 GB/s 10 Bob Jenkins -SBox 1.4 GB/s 9 Bret Mulvey -Lookup3 1.2 GB/s 9 Bob Jenkins -SuperFastHash 1.2 GB/s 1 Paul Hsieh -CityHash64 1.05 GB/s 10 Pike & Alakuijala -FNV 0.55 GB/s 5 Fowler, Noll, Vo -CRC32 0.43 GB/s 9 -MD5-32 0.33 GB/s 10 Ronald L. Rivest -SHA1-32 0.28 GB/s 10 - -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. -10 is a perfect score. - -A 64-bits version, named XXH64, is available since r35. -It offers much better speed, but for 64-bits applications only. -Name Speed on 64 bits Speed on 32 bits -XXH64 13.8 GB/s 1.9 GB/s -XXH32 6.8 GB/s 6.0 GB/s -*/ - -#ifndef XXHASH_H_5627135585666179 -#define XXHASH_H_5627135585666179 1 - - -/* **************************** -* Definitions -******************************/ -#include /* size_t */ -namespace duckdb_zstd { -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; - - -/* **************************** -* API modifier -******************************/ -/** XXH_PRIVATE_API -* This is useful if you want to include xxhash functions in `static` mode -* in order to inline them, and remove their symbol from the public list. -* Methodology : -* #define XXH_PRIVATE_API -* #include "zstd/common/xxhash.h" -* `xxhash.c` is automatically included. -* It's not useful to compile and link it as a separate module anymore. -*/ -#ifdef XXH_PRIVATE_API -# ifndef XXH_STATIC_LINKING_ONLY -# define XXH_STATIC_LINKING_ONLY -# endif -# if defined(__GNUC__) -# define XXH_PUBLIC_API static __inline __attribute__((unused)) -# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define XXH_PUBLIC_API static inline -# elif defined(_MSC_VER) -# define XXH_PUBLIC_API static __inline -# else -# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ -# endif -#else -# define XXH_PUBLIC_API /* do nothing */ -#endif /* XXH_PRIVATE_API */ - -/*!XXH_NAMESPACE, aka Namespace Emulation : - -If you want to include _and expose_ xxHash functions from within your own library, -but also want to avoid symbol collisions with another library which also includes xxHash, - -you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library -with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). - -Note that no change is required within the calling program as long as it includes `xxhash.h` : -regular symbol name will be automatically translated by this header. -*/ -#ifdef XXH_NAMESPACE -# define XXH_CAT(A,B) A##B -# define XXH_NAME2(A,B) XXH_CAT(A,B) -# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) -# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) -# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) -# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) -# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) -# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) -# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) -# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) -# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) -# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) -# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) -# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) -# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) -# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) -# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) -# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) -# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) -# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) -# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) -#endif - - -/* ************************************* -* Version -***************************************/ -#define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 2 -#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) -XXH_PUBLIC_API unsigned XXH_versionNumber (void); - - -/* **************************** -* Simple Hash Functions -******************************/ -typedef unsigned int XXH32_hash_t; -typedef unsigned long long XXH64_hash_t; - -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); - -/*! -XXH32() : - Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". - The memory between input & input+length must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s -XXH64() : - Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". - "seed" can be used to alter the result predictably. - This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). -*/ - - -/* **************************** -* Streaming Hash Functions -******************************/ -typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ -typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ - -/*! State allocation, compatible with dynamic libraries */ - -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); - -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); - - -/* hash streaming */ - -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); - -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); - -/* -These functions generate the xxHash of an input provided in multiple segments. -Note that, for small input, they are slower than single-call functions, due to state management. -For small input, prefer `XXH32()` and `XXH64()` . - -XXH state must first be allocated, using XXH*_createState() . - -Start a new hash by initializing state with a seed, using XXH*_reset(). - -Then, feed the hash state by calling XXH*_update() as many times as necessary. -Obviously, input must be allocated and read accessible. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. - -Finally, a hash value can be produced anytime, by using XXH*_digest(). -This function returns the nn-bits hash as an int or long long. - -It's still possible to continue inserting input into the hash state after a digest, -and generate some new hashes later on, by calling again XXH*_digest(). - -When done, free XXH state space if it was allocated dynamically. -*/ - - -/* ************************** -* Utils -****************************/ -#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ -# define __restrict /* disable restrict */ -#endif - -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* __restrict dst_state, const XXH32_state_t* __restrict src_state); -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* __restrict dst_state, const XXH64_state_t* __restrict src_state); - - -/* ************************** -* Canonical representation -****************************/ -/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. -* The canonical representation uses human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. -*/ -typedef struct { unsigned char digest[4]; } XXH32_canonical_t; -typedef struct { unsigned char digest[8]; } XXH64_canonical_t; - -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); - -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); - -} - -#endif /* XXHASH_H_5627135585666179 */ - - - -// LICENSE_CHANGE_END - /* XXH_reset, update, digest */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - - -/* ================================================================================================ - This section contains definitions which are not guaranteed to remain stable. - They may change in future versions, becoming incompatible with a different version of the library. - They shall only be used with static linking. - Never use these definitions in association with dynamic linking ! -=================================================================================================== */ -#ifndef XXH_STATIC_H_3543687687345 -#define XXH_STATIC_H_3543687687345 - -namespace duckdb_zstd { - -/* These definitions are only meant to allow allocation of XXH state - statically, on stack, or in a struct for example. - Do not use members directly. */ - - struct XXH32_state_s { - unsigned total_len_32; - unsigned large_len; - unsigned v1; - unsigned v2; - unsigned v3; - unsigned v4; - unsigned mem32[4]; /* buffer defined as U32 for alignment */ - unsigned memsize; - unsigned reserved; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH32_state_t */ - - struct XXH64_state_s { - unsigned long long total_len; - unsigned long long v1; - unsigned long long v2; - unsigned long long v3; - unsigned long long v4; - unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ - unsigned memsize; - unsigned reserved[2]; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH64_state_t */ - -} -// # ifdef XXH_PRIVATE_API -// # include "xxhash.cpp" /* include xxhash functions as `static`, for inlining */ -// # endif - -#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ - - -// LICENSE_CHANGE_END - /* XXH_reset, update, digest */ - -namespace duckdb_zstd { - -/* ---- static assert (debug) --- */ -#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) -#define ZSTD_isError ERR_isError /* for inlining */ -// #define FSE_isError ERR_isError -// #define HUF_isError ERR_isError - - -/*-************************************* -* shared macros -***************************************/ -#undef MIN -#undef MAX -#define MIN(a,b) ((a)<(b) ? (a) : (b)) -#define MAX(a,b) ((a)>(b) ? (a) : (b)) - -/** - * Ignore: this is an internal helper. - * - * This is a helper function to help force C99-correctness during compilation. - * Under strict compilation modes, variadic macro arguments can't be empty. - * However, variadic function arguments can be. Using a function therefore lets - * us statically check that at least one (string) argument was passed, - * independent of the compilation flags. - */ -static INLINE_KEYWORD UNUSED_ATTR -void _force_has_format_string(const char *format, ...) { - (void)format; -} - -/** - * Ignore: this is an internal helper. - * - * We want to force this function invocation to be syntactically correct, but - * we don't want to force runtime evaluation of its arguments. - */ -#define _FORCE_HAS_FORMAT_STRING(...) \ - if (0) { \ - _force_has_format_string(__VA_ARGS__); \ - } - -/** - * Return the specified error if the condition evaluates to true. - * - * In debug modes, prints additional information. - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ -#define RETURN_ERROR_IF(cond, err, ...) \ - if (cond) { \ - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } - -/** - * Unconditionally return the specified error. - * - * In debug modes, prints additional information. - */ -#define RETURN_ERROR(err, ...) \ - do { \ - RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } while(0); - -/** - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ -#define FORWARD_IF_ERROR(err, ...) \ - do { \ - size_t const err_code = (err); \ - if (ERR_isError(err_code)) { \ - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return err_code; \ - } \ - } while(0); - - -/*-************************************* -* Common constants -***************************************/ -#define ZSTD_OPT_NUM (1<<12) - -#define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 -#define BIT1 2 -#define BIT0 1 - -#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 - -#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ - -#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ - -typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; - -#define ZSTD_FRAMECHECKSUMSIZE 4 - -#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ - -#define HufLog 12 -typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; - -#define LONGNBSEQ 0x7F00 - -#define MINMATCH 3 - -#define Litbits 8 -#define MaxLit ((1<= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); - - if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { - /* Handle short offset copies. */ - do { - COPY8(op, ip) - } while (op < oend); - } else { - assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); - /* Separate out the first COPY16() call because the copy length is - * almost certain to be short, so the branches have different - * probabilities. Since it is almost certain to be short, only do - * one COPY16() in the first call. Then, do two calls per loop since - * at that point it is more likely to have a high trip count. - */ -#ifndef __aarch64__ - do { - COPY16(op, ip); - } - while (op < oend); -#else - COPY16(op, ip); - if (op >= oend) return; - do { - COPY16(op, ip); - COPY16(op, ip); - } - while (op < oend); -#endif - } -} - -MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - size_t const length = MIN(dstCapacity, srcSize); - if (length > 0) { - memcpy(dst, src, length); - } - return length; -} - -/* define "workspace is too large" as this number of times larger than needed */ -#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 - -/* when workspace is continuously too large - * during at least this number of times, - * context's memory usage is considered wasteful, - * because it's sized to handle a worst case scenario which rarely happens. - * In which case, resize it down to free some memory */ -#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 - - -/*-******************************************* -* Private declarations -*********************************************/ -typedef struct seqDef_s { - U32 offset; - U16 litLength; - U16 matchLength; -} seqDef; - -typedef struct { - seqDef* sequencesStart; - seqDef* sequences; - BYTE* litStart; - BYTE* lit; - BYTE* llCode; - BYTE* mlCode; - BYTE* ofCode; - size_t maxNbSeq; - size_t maxNbLit; - U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ - U32 longLengthPos; -} seqStore_t; - -typedef struct { - U32 litLength; - U32 matchLength; -} ZSTD_sequenceLength; - -/** - * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences - * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. - */ -MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) -{ - ZSTD_sequenceLength seqLen; - seqLen.litLength = seq->litLength; - seqLen.matchLength = seq->matchLength + MINMATCH; - if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { - if (seqStore->longLengthID == 1) { - seqLen.litLength += 0xFFFF; - } - if (seqStore->longLengthID == 2) { - seqLen.matchLength += 0xFFFF; - } - } - return seqLen; -} - -/** - * Contains the compressed frame size and an upper-bound for the decompressed frame size. - * Note: before using `compressedSize`, check for errors using ZSTD_isError(). - * similarly, before using `decompressedBound`, check for errors using: - * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` - */ -typedef struct { - size_t compressedSize; - unsigned long long decompressedBound; -} ZSTD_frameSizeInfo; /* decompress & legacy */ - -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ - -/* custom memory allocation functions */ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); -void ZSTD_free(void* ptr, ZSTD_customMem customMem); - - -MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse(&r, val) ? (unsigned)r : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; -# endif - } -} - - -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ - - -typedef struct { - blockType_e blockType; - U32 lastBlock; - U32 origSize; -} blockProperties_t; /* declared here for decompress and fullbench */ - -/*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ -/* Used by: decompress, fullbench (does not get its definition from here) */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr); - -/*! ZSTD_decodeSeqHeaders() : - * decode sequence header from src */ -/* Used by: decompress, fullbench (does not get its definition from here) */ -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize); - -} - -#endif /* ZSTD_CCOMMON_H_MODULE */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_CWKSP_H -#define ZSTD_CWKSP_H - -/*-************************************* -* Dependencies -***************************************/ - - -/*-************************************* -* Constants -***************************************/ - -/* Since the workspace is effectively its own little malloc implementation / - * arena, when we run under ASAN, we should similarly insert redzones between - * each internal element of the workspace, so ASAN will catch overruns that - * reach outside an object but that stay inside the workspace. - * - * This defines the size of that redzone. - */ -#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE -#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 -#endif - -namespace duckdb_zstd { - -/*-************************************* -* Structures -***************************************/ -typedef enum { - ZSTD_cwksp_alloc_objects, - ZSTD_cwksp_alloc_buffers, - ZSTD_cwksp_alloc_aligned -} ZSTD_cwksp_alloc_phase_e; - -/** - * Zstd fits all its internal datastructures into a single continuous buffer, - * so that it only needs to perform a single OS allocation (or so that a buffer - * can be provided to it and it can perform no allocations at all). This buffer - * is called the workspace. - * - * Several optimizations complicate that process of allocating memory ranges - * from this workspace for each internal datastructure: - * - * - These different internal datastructures have different setup requirements: - * - * - The static objects need to be cleared once and can then be trivially - * reused for each compression. - * - * - Various buffers don't need to be initialized at all--they are always - * written into before they're read. - * - * - The matchstate tables have a unique requirement that they don't need - * their memory to be totally cleared, but they do need the memory to have - * some bound, i.e., a guarantee that all values in the memory they've been - * allocated is less than some maximum value (which is the starting value - * for the indices that they will then use for compression). When this - * guarantee is provided to them, they can use the memory without any setup - * work. When it can't, they have to clear the area. - * - * - These buffers also have different alignment requirements. - * - * - We would like to reuse the objects in the workspace for multiple - * compressions without having to perform any expensive reallocation or - * reinitialization work. - * - * - We would like to be able to efficiently reuse the workspace across - * multiple compressions **even when the compression parameters change** and - * we need to resize some of the objects (where possible). - * - * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp - * abstraction was created. It works as follows: - * - * Workspace Layout: - * - * [ ... workspace ... ] - * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] - * - * The various objects that live in the workspace are divided into the - * following categories, and are allocated separately: - * - * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, - * so that literally everything fits in a single buffer. Note: if present, - * this must be the first object in the workspace, since ZSTD_free{CCtx, - * CDict}() rely on a pointer comparison to see whether one or two frees are - * required. - * - * - Fixed size objects: these are fixed-size, fixed-count objects that are - * nonetheless "dynamically" allocated in the workspace so that we can - * control how they're initialized separately from the broader ZSTD_CCtx. - * Examples: - * - Entropy Workspace - * - 2 x ZSTD_compressedBlockState_t - * - CDict dictionary contents - * - * - Tables: these are any of several different datastructures (hash tables, - * chain tables, binary trees) that all respect a common format: they are - * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). - * Their sizes depend on the cparams. - * - * - Aligned: these buffers are used for various purposes that require 4 byte - * alignment, but don't require any initialization before they're used. - * - * - Buffers: these buffers are used for various purposes that don't require - * any alignment or initialization before they're used. This means they can - * be moved around at no cost for a new compression. - * - * Allocating Memory: - * - * The various types of objects must be allocated in order, so they can be - * correctly packed into the workspace buffer. That order is: - * - * 1. Objects - * 2. Buffers - * 3. Aligned - * 4. Tables - * - * Attempts to reserve objects of different types out of order will fail. - */ -typedef struct { - void* workspace; - void* workspaceEnd; - - void* objectEnd; - void* tableEnd; - void* tableValidEnd; - void* allocStart; - - int allocFailed; - int workspaceOversizedDuration; - ZSTD_cwksp_alloc_phase_e phase; -} ZSTD_cwksp; - -/*-************************************* -* Functions -***************************************/ - -MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); - -MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { - (void)ws; - assert(ws->workspace <= ws->objectEnd); - assert(ws->objectEnd <= ws->tableEnd); - assert(ws->objectEnd <= ws->tableValidEnd); - assert(ws->tableEnd <= ws->allocStart); - assert(ws->tableValidEnd <= ws->allocStart); - assert(ws->allocStart <= ws->workspaceEnd); -} - -/** - * Align must be a power of 2. - */ -MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { - size_t const mask = align - 1; - assert((align & mask) == 0); - return (size + mask) & ~mask; -} - -/** - * Use this to determine how much space in the workspace we will consume to - * allocate this object. (Normally it should be exactly the size of the object, - * but under special conditions, like ASAN, where we pad each object, it might - * be larger.) - * - * Since tables aren't currently redzoned, you don't need to call through this - * to figure out how much space you need for the matchState tables. Everything - * else is though. - */ -MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#else - return size; -#endif -} - -MEM_STATIC void ZSTD_cwksp_internal_advance_phase( - ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { - assert(phase >= ws->phase); - if (phase > ws->phase) { - if (ws->phase < ZSTD_cwksp_alloc_buffers && - phase >= ZSTD_cwksp_alloc_buffers) { - ws->tableValidEnd = ws->objectEnd; - } - if (ws->phase < ZSTD_cwksp_alloc_aligned && - phase >= ZSTD_cwksp_alloc_aligned) { - /* If unaligned allocations down from a too-large top have left us - * unaligned, we need to realign our alloc ptr. Technically, this - * can consume space that is unaccounted for in the neededSpace - * calculation. However, I believe this can only happen when the - * workspace is too large, and specifically when it is too large - * by a larger margin than the space that will be consumed. */ - /* TODO: cleaner, compiler warning friendly way to do this??? */ - ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); - if (ws->allocStart < ws->tableValidEnd) { - ws->tableValidEnd = ws->allocStart; - } - } - ws->phase = phase; - } -} - -/** - * Returns whether this object/buffer/etc was allocated in this workspace. - */ -MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { - return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); -} - -/** - * Internal function. Do not use directly. - */ -MEM_STATIC void* ZSTD_cwksp_reserve_internal( - ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { - void* alloc; - void* bottom = ws->tableEnd; - ZSTD_cwksp_internal_advance_phase(ws, phase); - alloc = (BYTE *)ws->allocStart - bytes; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* over-reserve space */ - alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#endif - - DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(alloc >= bottom); - if (alloc < bottom) { - DEBUGLOG(4, "cwksp: alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - if (alloc < ws->tableValidEnd) { - ws->tableValidEnd = alloc; - } - ws->allocStart = alloc; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on - * either size. */ - alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - __asan_unpoison_memory_region(alloc, bytes); -#endif - - return alloc; -} - -/** - * Reserves and returns unaligned memory. - */ -MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { - return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); -} - -/** - * Reserves and returns memory sized on and aligned on sizeof(unsigned). - */ -MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { - assert((bytes & (sizeof(U32)-1)) == 0); - return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); -} - -/** - * Aligned on sizeof(unsigned). These buffers have the special property that - * their values remain constrained, allowing us to re-use them without - * memset()-ing them. - */ -MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { - const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; - void* alloc = ws->tableEnd; - void* end = (BYTE *)alloc + bytes; - void* top = ws->allocStart; - - DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - assert((bytes & (sizeof(U32)-1)) == 0); - ZSTD_cwksp_internal_advance_phase(ws, phase); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(end <= top); - if (end > top) { - DEBUGLOG(4, "cwksp: table alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - ws->tableEnd = end; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - __asan_unpoison_memory_region(alloc, bytes); -#endif - - return alloc; -} - -/** - * Aligned on sizeof(void*). - */ -MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { - size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); - void* alloc = ws->objectEnd; - void* end = (BYTE*)alloc + roundedBytes; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* over-reserve space */ - end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#endif - - DEBUGLOG(5, - "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", - alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); - assert(((size_t)alloc & (sizeof(void*)-1)) == 0); - assert((bytes & (sizeof(void*)-1)) == 0); - ZSTD_cwksp_assert_internal_consistency(ws); - /* we must be in the first phase, no advance is possible */ - if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { - DEBUGLOG(4, "cwksp: object alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - ws->objectEnd = end; - ws->tableEnd = end; - ws->tableValidEnd = end; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on - * either size. */ - alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - __asan_unpoison_memory_region(alloc, bytes); -#endif - - return alloc; -} - -MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); - -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the table re-use logic is sound, and that we don't - * access table space that we haven't cleaned, we re-"poison" the table - * space every time we mark it dirty. */ - { - size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; - assert(__msan_test_shadow(ws->objectEnd, size) == -1); - __msan_poison(ws->objectEnd, size); - } -#endif - - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - ws->tableValidEnd = ws->objectEnd; - ZSTD_cwksp_assert_internal_consistency(ws); -} - -MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - if (ws->tableValidEnd < ws->tableEnd) { - ws->tableValidEnd = ws->tableEnd; - } - ZSTD_cwksp_assert_internal_consistency(ws); -} - -/** - * Zero the part of the allocated tables not already marked clean. - */ -MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - if (ws->tableValidEnd < ws->tableEnd) { - memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); - } - ZSTD_cwksp_mark_tables_clean(ws); -} - -/** - * Invalidates table allocations. - * All other allocations remain valid. - */ -MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: clearing tables!"); - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - { - size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; - __asan_poison_memory_region(ws->objectEnd, size); - } -#endif - - ws->tableEnd = ws->objectEnd; - ZSTD_cwksp_assert_internal_consistency(ws); -} - -/** - * Invalidates all buffer, aligned, and table allocations. - * Object allocations remain valid. - */ -MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: clearing!"); - -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the context re-use logic is sound, and that we don't - * access stuff that this compression hasn't initialized, we re-"poison" - * the workspace (or at least the non-static, non-table parts of it) - * every time we start a new compression. */ - { - size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; - __msan_poison(ws->tableValidEnd, size); - } -#endif - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - { - size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; - __asan_poison_memory_region(ws->objectEnd, size); - } -#endif - - ws->tableEnd = ws->objectEnd; - ws->allocStart = ws->workspaceEnd; - ws->allocFailed = 0; - if (ws->phase > ZSTD_cwksp_alloc_buffers) { - ws->phase = ZSTD_cwksp_alloc_buffers; - } - ZSTD_cwksp_assert_internal_consistency(ws); -} - -/** - * The provided workspace takes ownership of the buffer [start, start+size). - * Any existing values in the workspace are ignored (the previously managed - * buffer, if present, must be separately freed). - */ -MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { - DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); - assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ - ws->workspace = start; - ws->workspaceEnd = (BYTE*)start + size; - ws->objectEnd = ws->workspace; - ws->tableValidEnd = ws->objectEnd; - ws->phase = ZSTD_cwksp_alloc_objects; - ZSTD_cwksp_clear(ws); - ws->workspaceOversizedDuration = 0; - ZSTD_cwksp_assert_internal_consistency(ws); -} - -MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { - void* workspace = ZSTD_malloc(size, customMem); - DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); - RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); - ZSTD_cwksp_init(ws, workspace, size); - return 0; -} - -MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { - void *ptr = ws->workspace; - DEBUGLOG(4, "cwksp: freeing workspace"); - memset(ws, 0, sizeof(ZSTD_cwksp)); - ZSTD_free(ptr, customMem); -} - -/** - * Moves the management of a workspace from one cwksp to another. The src cwksp - * is left in an invalid state (src must be re-init()'ed before its used again). - */ -MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { - *dst = *src; - memset(src, 0, sizeof(ZSTD_cwksp)); -} - -MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { - return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); -} - -MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { - return ws->allocFailed; -} - -/*-************************************* -* Functions Checking Free Space -***************************************/ - -MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { - return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); -} - -MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; -} - -MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_check_available( - ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); -} - -MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) - && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; -} - -MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( - ZSTD_cwksp* ws, size_t additionalNeededSpace) { - if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { - ws->workspaceOversizedDuration++; - } else { - ws->workspaceOversizedDuration = 0; - } -} - -} - -#endif /* ZSTD_CWKSP_H */ - - -// LICENSE_CHANGE_END - -// #ifdef ZSTD_MULTITHREAD -// # include "zstdmt_compress.h" -// #endif - -/*-************************************* -* Constants -***************************************/ -#define kSearchStrength 8 -#define HASH_READ_SIZE 8 -#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". - It could be confused for a real successor at index "1", if sorted as larger than its predecessor. - It's not a big deal though : candidate will just be sorted again. - Additionally, candidate position 1 will be lost. - But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. - This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ - - -namespace duckdb_zstd { -/*-************************************* -* Context memory management -***************************************/ -typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; -typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; - -typedef struct ZSTD_prefixDict_s { - const void* dict; - size_t dictSize; - ZSTD_dictContentType_e dictContentType; -} ZSTD_prefixDict; - -typedef struct { - void* dictBuffer; - void const* dict; - size_t dictSize; - ZSTD_dictContentType_e dictContentType; - ZSTD_CDict* cdict; -} ZSTD_localDict; - -typedef struct { - U32 CTable[HUF_CTABLE_SIZE_U32(255)]; - HUF_repeat repeatMode; -} ZSTD_hufCTables_t; - -typedef struct { - FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; - FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; - FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; - FSE_repeat offcode_repeatMode; - FSE_repeat matchlength_repeatMode; - FSE_repeat litlength_repeatMode; -} ZSTD_fseCTables_t; - -typedef struct { - ZSTD_hufCTables_t huf; - ZSTD_fseCTables_t fse; -} ZSTD_entropyCTables_t; - -typedef struct { - U32 off; - U32 len; -} ZSTD_match_t; - -typedef struct { - int price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_optimal_t; - -typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; - -typedef struct { - /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ - unsigned* litFreq; /* table of literals statistics, of size 256 */ - unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ - unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ - unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ - ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ - ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ - - U32 litSum; /* nb of literals */ - U32 litLengthSum; /* nb of litLength codes */ - U32 matchLengthSum; /* nb of matchLength codes */ - U32 offCodeSum; /* nb of offset codes */ - U32 litSumBasePrice; /* to compare to log2(litfreq) */ - U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ - U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ - U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ - ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ - const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ - ZSTD_literalCompressionMode_e literalCompressionMode; -} optState_t; - -typedef struct { - ZSTD_entropyCTables_t entropy; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_compressedBlockState_t; - -typedef struct { - BYTE const* nextSrc; /* next block here to continue on current prefix */ - BYTE const* base; /* All regular indexes relative to this position */ - BYTE const* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more valid data */ -} ZSTD_window_t; - -typedef struct ZSTD_matchState_t ZSTD_matchState_t; -struct ZSTD_matchState_t { - ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary, within context's referential. - * When loadedDictEnd != 0, a dictionary is in use, and still valid. - * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. - * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). - * When dict referential is copied into active context (i.e. not attached), - * loadedDictEnd == dictSize, since referential starts from zero. - */ - U32 nextToUpdate; /* index from which to continue table update */ - U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ - U32* hashTable; - U32* hashTable3; - U32* chainTable; - optState_t opt; /* optimal parser state */ - const ZSTD_matchState_t* dictMatchState; - ZSTD_compressionParameters cParams; -}; - -typedef struct { - ZSTD_compressedBlockState_t* prevCBlock; - ZSTD_compressedBlockState_t* nextCBlock; - ZSTD_matchState_t matchState; -} ZSTD_blockState_t; - -typedef struct { - U32 offset; - U32 checksum; -} ldmEntry_t; - -typedef struct { - ZSTD_window_t window; /* State for the window round buffer management */ - ldmEntry_t* hashTable; - U32 loadedDictEnd; - BYTE* bucketOffsets; /* Next position in bucket to insert entry */ - U64 hashPower; /* Used to compute the rolling hash. - * Depends on ldmParams.minMatchLength */ -} ldmState_t; - -typedef struct { - U32 enableLdm; /* 1 if enable long distance matching */ - U32 hashLog; /* Log size of hashTable */ - U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ - U32 minMatchLength; /* Minimum match length */ - U32 hashRateLog; /* Log number of entries to skip */ - U32 windowLog; /* Window log for the LDM */ -} ldmParams_t; - -typedef struct { - U32 offset; - U32 litLength; - U32 matchLength; -} rawSeq; - -typedef struct { - rawSeq* seq; /* The start of the sequences */ - size_t pos; /* The position where reading stopped. <= size. */ - size_t size; /* The number of sequences. <= capacity. */ - size_t capacity; /* The capacity starting from `seq` pointer */ -} rawSeqStore_t; - -typedef struct { - int collectSequences; - ZSTD_Sequence* seqStart; - size_t seqIndex; - size_t maxSequences; -} SeqCollector; - -struct ZSTD_CCtx_params_s { - ZSTD_format_e format; - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; - - int compressionLevel; - int forceWindow; /* force back-references to respect limit of - * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; -} - -/* ZSTD_MLcode() : - * note : mlBase = matchLength - MINMATCH; - * because it's the format it's stored in seqStore->sequences */ -MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) -{ - static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; - static const U32 ML_deltaCode = 36; - return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; -} - -typedef struct repcodes_s { - U32 rep[3]; -} repcodes_t; - -MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) -{ - repcodes_t newReps; - if (offset >= ZSTD_REP_NUM) { /* full offset */ - newReps.rep[2] = rep[1]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = offset - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offset + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = currentOffset; - } else { /* repCode == 0 */ - memcpy(&newReps, rep, sizeof(newReps)); - } - } - return newReps; -} - -/* ZSTD_cParam_withinBounds: - * @return 1 if value is within cParam bounds, - * 0 otherwise */ -MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) -{ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); - if (ZSTD_isError(bounds.error)) return 0; - if (value < bounds.lowerBound) return 0; - if (value > bounds.upperBound) return 0; - return 1; -} - -/* ZSTD_noCompressBlock() : - * Writes uncompressed block to dst buffer from given src. - * Returns the size of the block */ -MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) -{ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(srcSize + ZSTDInternalConstants::ZSTD_blockHeaderSize > dstCapacity, - dstSize_tooSmall, "dst buf too small for uncompressed block"); - MEM_writeLE24(dst, cBlockHeader24); - memcpy((BYTE*)dst + ZSTDInternalConstants::ZSTD_blockHeaderSize, src, srcSize); - return ZSTDInternalConstants::ZSTD_blockHeaderSize + srcSize; -} - -MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) -{ - BYTE* const op = (BYTE*)dst; - U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); - MEM_writeLE24(op, cBlockHeader); - op[3] = src; - return 4; -} - - -/* ZSTD_minGain() : - * minimum compression required - * to generate a compress block or a compressed literals section. - * note : use same formula for both situations */ -MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) -{ - U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; - ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - return (srcSize >> minlog) + 2; -} - -MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) -{ - switch (cctxParams->literalCompressionMode) { - case ZSTD_lcm_huffman: - return 0; - case ZSTD_lcm_uncompressed: - return 1; - default: - assert(0 /* impossible: pre-validated */); - /* fall-through */ - case ZSTD_lcm_auto: - return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); - } -} - -/*! ZSTD_safecopyLiterals() : - * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. - * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single - * large copies. - */ -static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { - assert(iend > ilimit_w); - if (ip <= ilimit_w) { - ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); - op += ilimit_w - ip; - ip = ilimit_w; - } - while (ip < iend) *op++ = *ip++; -} - -/*! ZSTD_storeSeq() : - * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. - * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). - * `mlBase` : matchLength - MINMATCH - * Allowed to overread literals up to litLimit. -*/ -HINT_INLINE UNUSED_ATTR -void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) -{ - BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; - BYTE const* const litEnd = literals + litLength; -#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) - static const BYTE* g_start = NULL; - if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ - { U32 const pos = (U32)((const BYTE*)literals - g_start); - DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); - } -#endif - assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); - /* copy Literals */ - assert(seqStorePtr->maxNbLit <= 128 KB); - assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); - assert(literals + litLength <= litLimit); - if (litEnd <= litLimit_w) { - /* Common case we can use wildcopy. - * First copy 16 bytes, because literals are likely short. - */ - assert(WILDCOPY_OVERLENGTH >= 16); - ZSTD_copy16(seqStorePtr->lit, literals); - if (litLength > 16) { - ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); - } - } else { - ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); - } - seqStorePtr->lit += litLength; - - /* literal Length */ - if (litLength>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 1; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].litLength = (U16)litLength; - - /* match offset */ - seqStorePtr->sequences[0].offset = offCode + 1; - - /* match Length */ - if (mlBase>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 2; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].matchLength = (U16)mlBase; - - seqStorePtr->sequences++; -} - - -/*-************************************* -* Match length counter -***************************************/ -static unsigned ZSTD_NbCommonBytes (size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r=0; - return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r = 0; - return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } -} - - -MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) -{ - const BYTE* const pStart = pIn; - const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); - - if (pIn < pInLoopLimit) { - { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (diff) return ZSTD_NbCommonBytes(diff); } - pIn+=sizeof(size_t); pMatch+=sizeof(size_t); - while (pIn < pInLoopLimit) { - size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } - pIn += ZSTD_NbCommonBytes(diff); - return (size_t)(pIn - pStart); - } } - if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } - if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } - if ((pIn> (32-h) ; } -MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ - -static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } - -static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } -static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } - -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } - -static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } -static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } - -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } -static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } - -MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) -{ - switch(mls) - { - default: - case 4: return ZSTD_hash4Ptr(p, hBits); - case 5: return ZSTD_hash5Ptr(p, hBits); - case 6: return ZSTD_hash6Ptr(p, hBits); - case 7: return ZSTD_hash7Ptr(p, hBits); - case 8: return ZSTD_hash8Ptr(p, hBits); - } -} - -/** ZSTD_ipow() : - * Return base^exponent. - */ -static U64 ZSTD_ipow(U64 base, U64 exponent) -{ - U64 power = 1; - while (exponent) { - if (exponent & 1) power *= base; - exponent >>= 1; - base *= base; - } - return power; -} - -#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 - -/** ZSTD_rollingHash_append() : - * Add the buffer to the hash value. - */ -static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) -{ - BYTE const* istart = (BYTE const*)buf; - size_t pos; - for (pos = 0; pos < size; ++pos) { - hash *= prime8bytes; - hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; - } - return hash; -} - -/** ZSTD_rollingHash_compute() : - * Compute the rolling hash value of the buffer. - */ -MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) -{ - return ZSTD_rollingHash_append(0, buf, size); -} - -/** ZSTD_rollingHash_primePower() : - * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash - * over a window of length bytes. - */ -MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) -{ - return ZSTD_ipow(prime8bytes, length - 1); -} - -/** ZSTD_rollingHash_rotate() : - * Rotate the rolling hash by one byte. - */ -MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) -{ - hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; - hash *= prime8bytes; - hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; - return hash; -} - -/*-************************************* -* Round buffer management -***************************************/ -#if (ZSTD_WINDOWLOG_MAX_64 > 31) -# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" -#endif -/* Max current allowed */ -#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) -/* Maximum chunk size before overflow correction needs to be called again */ -#define ZSTD_CHUNKSIZE_MAX \ - ( ((U32)-1) /* Maximum ending current index */ \ - - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ - -/** - * ZSTD_window_clear(): - * Clears the window containing the history by simply setting it to empty. - */ -MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) -{ - size_t const endT = (size_t)(window->nextSrc - window->base); - U32 const end = (U32)endT; - - window->lowLimit = end; - window->dictLimit = end; -} - -/** - * ZSTD_window_hasExtDict(): - * Returns non-zero if the window has a non-empty extDict. - */ -MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) -{ - return window.lowLimit < window.dictLimit; -} - -/** - * ZSTD_matchState_dictMode(): - * Inspects the provided matchState and figures out what dictMode should be - * passed to the compressor. - */ -MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) -{ - return ZSTD_window_hasExtDict(ms->window) ? - ZSTD_extDict : - ms->dictMatchState != NULL ? - ZSTD_dictMatchState : - ZSTD_noDict; -} - -/** - * ZSTD_window_needOverflowCorrection(): - * Returns non-zero if the indices are getting too large and need overflow - * protection. - */ -MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, - void const* srcEnd) -{ - U32 const current = (U32)((BYTE const*)srcEnd - window.base); - return current > ZSTD_CURRENT_MAX; -} - -/** - * ZSTD_window_correctOverflow(): - * Reduces the indices to protect from index overflow. - * Returns the correction made to the indices, which must be applied to every - * stored index. - * - * The least significant cycleLog bits of the indices must remain the same, - * which may be 0. Every index up to maxDist in the past must be valid. - * NOTE: (maxDist & cycleMask) must be zero. - */ -MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, - U32 maxDist, void const* src) -{ - /* preemptive overflow correction: - * 1. correction is large enough: - * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) - * > 1<<29 - * - * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: - * After correction, current is less than (1<base < 1<<32. - * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); - U32 const currentCycle0 = current & cycleMask; - /* Exclude zero so that newCurrent - maxDist >= 1. */ - U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; - U32 const newCurrent = currentCycle1 + maxDist; - U32 const correction = current - newCurrent; - assert((maxDist & cycleMask) == 0); - assert(current > newCurrent); - /* Loose bound, should be around 1<<29 (see above) */ - assert(correction > 1<<28); - - window->base += correction; - window->dictBase += correction; - if (window->lowLimit <= correction) window->lowLimit = 1; - else window->lowLimit -= correction; - if (window->dictLimit <= correction) window->dictLimit = 1; - else window->dictLimit -= correction; - - /* Ensure we can still reference the full window. */ - assert(newCurrent >= maxDist); - assert(newCurrent - maxDist >= 1); - /* Ensure that lowLimit and dictLimit didn't underflow. */ - assert(window->lowLimit <= newCurrent); - assert(window->dictLimit <= newCurrent); - - DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, - window->lowLimit); - return correction; -} - -/** - * ZSTD_window_enforceMaxDist(): - * Updates lowLimit so that: - * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd - * - * It ensures index is valid as long as index >= lowLimit. - * This must be called before a block compression call. - * - * loadedDictEnd is only defined if a dictionary is in use for current compression. - * As the name implies, loadedDictEnd represents the index at end of dictionary. - * The value lies within context's referential, it can be directly compared to blockEndIdx. - * - * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. - * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. - * This is because dictionaries are allowed to be referenced fully - * as long as the last byte of the dictionary is in the window. - * Once input has progressed beyond window size, dictionary cannot be referenced anymore. - * - * In normal dict mode, the dictionary lies between lowLimit and dictLimit. - * In dictMatchState mode, lowLimit and dictLimit are the same, - * and the dictionary is below them. - * forceWindow and dictMatchState are therefore incompatible. - */ -MEM_STATIC void -ZSTD_window_enforceMaxDist(ZSTD_window_t* window, - const void* blockEnd, - U32 maxDist, - U32* loadedDictEndPtr, - const ZSTD_matchState_t** dictMatchStatePtr) -{ - U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); - U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; - DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - - /* - When there is no dictionary : loadedDictEnd == 0. - In which case, the test (blockEndIdx > maxDist) is merely to avoid - overflowing next operation `newLowLimit = blockEndIdx - maxDist`. - - When there is a standard dictionary : - Index referential is copied from the dictionary, - which means it starts from 0. - In which case, loadedDictEnd == dictSize, - and it makes sense to compare `blockEndIdx > maxDist + dictSize` - since `blockEndIdx` also starts from zero. - - When there is an attached dictionary : - loadedDictEnd is expressed within the referential of the context, - so it can be directly compared against blockEndIdx. - */ - if (blockEndIdx > maxDist + loadedDictEnd) { - U32 const newLowLimit = blockEndIdx - maxDist; - if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; - if (window->dictLimit < window->lowLimit) { - DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", - (unsigned)window->dictLimit, (unsigned)window->lowLimit); - window->dictLimit = window->lowLimit; - } - /* On reaching window size, dictionaries are invalidated */ - if (loadedDictEndPtr) *loadedDictEndPtr = 0; - if (dictMatchStatePtr) *dictMatchStatePtr = NULL; - } -} - -/* Similar to ZSTD_window_enforceMaxDist(), - * but only invalidates dictionary - * when input progresses beyond window size. - * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) - * loadedDictEnd uses same referential as window->base - * maxDist is the window size */ -MEM_STATIC void -ZSTD_checkDictValidity(const ZSTD_window_t* window, - const void* blockEnd, - U32 maxDist, - U32* loadedDictEndPtr, - const ZSTD_matchState_t** dictMatchStatePtr) -{ - assert(loadedDictEndPtr != NULL); - assert(dictMatchStatePtr != NULL); - { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); - U32 const loadedDictEnd = *loadedDictEndPtr; - DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - assert(blockEndIdx >= loadedDictEnd); - - if (blockEndIdx > loadedDictEnd + maxDist) { - /* On reaching window size, dictionaries are invalidated. - * For simplification, if window size is reached anywhere within next block, - * the dictionary is invalidated for the full block. - */ - DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); - *loadedDictEndPtr = 0; - *dictMatchStatePtr = NULL; - } else { - if (*loadedDictEndPtr != 0) { - DEBUGLOG(6, "dictionary considered valid for current block"); - } } } -} - -MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { - memset(window, 0, sizeof(*window)); - window->base = (BYTE const*)""; - window->dictBase = (BYTE const*)""; - window->dictLimit = 1; /* start from 1, so that 1st position is valid */ - window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - window->nextSrc = window->base + 1; /* see issue #1241 */ -} - -/** - * ZSTD_window_update(): - * Updates the window by appending [src, src + srcSize) to the window. - * If it is not contiguous, the current prefix becomes the extDict, and we - * forget about the extDict. Handles overlap of the prefix and extDict. - * Returns non-zero if the segment is contiguous. - */ -MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, - void const* src, size_t srcSize) -{ - BYTE const* const ip = (BYTE const*)src; - U32 contiguous = 1; - DEBUGLOG(5, "ZSTD_window_update"); - if (srcSize == 0) - return contiguous; - assert(window->base != NULL); - assert(window->dictBase != NULL); - /* Check if blocks follow each other */ - if (src != window->nextSrc) { - /* not contiguous */ - size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); - DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); - window->lowLimit = window->dictLimit; - assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ - window->dictLimit = (U32)distanceFromBase; - window->dictBase = window->base; - window->base = ip - distanceFromBase; - /* ms->nextToUpdate = window->dictLimit; */ - if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ - contiguous = 0; - } - window->nextSrc = ip + srcSize; - /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ - if ( (ip+srcSize > window->dictBase + window->lowLimit) - & (ip < window->dictBase + window->dictLimit)) { - ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; - U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; - window->lowLimit = lowLimitMax; - DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); - } - return contiguous; -} - -/** - * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. - */ -MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) -{ - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.lowLimit; - U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; - return matchLowest; -} - -/** - * Returns the lowest allowed match index in the prefix. - */ -MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) -{ - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.dictLimit; - U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; - return matchLowest; -} - - - -/* debug functions */ -#if (DEBUGLEVEL>=2) - -MEM_STATIC double ZSTD_fWeight(U32 rawStat) -{ - U32 const fp_accuracy = 8; - U32 const fp_multiplier = (1 << fp_accuracy); - U32 const newStat = rawStat + 1; - U32 const hb = ZSTD_highbit32(newStat); - U32 const BWeight = hb * fp_multiplier; - U32 const FWeight = (newStat << fp_accuracy) >> hb; - U32 const weight = BWeight + FWeight; - assert(hb + fp_accuracy < 31); - return (double)weight / fp_multiplier; -} - -/* display a table content, - * listing each element, its frequency, and its predicted bit cost */ -MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) -{ - unsigned u, sum; - for (u=0, sum=0; u<=max; u++) sum += table[u]; - DEBUGLOG(2, "total nb elts: %u", sum); - for (u=0; u<=max; u++) { - DEBUGLOG(2, "%2u: %5u (%.2f)", - u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); - } -} - -#endif - -/* =============================================================== - * Shared internal declarations - * These prototypes may be called from sources not in lib/compress - * =============================================================== */ - -/* ZSTD_loadCEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * return : size of dictionary header (size of magic number + dict ID + entropy tables) - * assumptions : magic number supposed already checked - * and dictSize >= 8 */ -size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, - short* offcodeNCount, unsigned* offcodeMaxValue, - const void* const dict, size_t dictSize); - -void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); - -/* ============================================================== - * Private declarations - * These prototypes shall only be called from within lib/compress - * ============================================================== */ - -/* ZSTD_getCParamsFromCCtxParams() : - * cParams are built depending on compressionLevel, src size hints, - * LDM and manually set compression parameters. - * Note: srcSizeHint == 0 means 0! - */ -ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); - -/*! ZSTD_initCStream_internal() : - * Private use only. Init streaming operation. - * expects params to be valid. - * must receive dict, or cdict, or none, but not both. - * @return : 0, or an error code */ -size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); - -void ZSTD_resetSeqStore(seqStore_t* ssPtr); - -/*! ZSTD_getCParamsFromCDict() : - * as the name implies */ -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); - -/* ZSTD_compressBegin_advanced_internal() : - * Private use only. To be called from zstdmt_compress.c. */ -size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize); - -/* ZSTD_compress_advanced_internal() : - * Private use only. To be called from zstdmt_compress.c. */ -size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_CCtx_params* params); - - -/* ZSTD_writeLastEmptyBlock() : - * output an empty Block with end-of-frame mark to complete a frame - * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) - * or an error code if `dstCapacity` is too small ( 1 */ -U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); - -} - -#endif /* ZSTD_COMPRESS_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPRESS_SEQUENCES_H -#define ZSTD_COMPRESS_SEQUENCES_H - - /* FSE_repeat, FSE_CTable */ - /* symbolEncodingType_e, ZSTD_strategy */ - -namespace duckdb_zstd { - -typedef enum { - ZSTD_defaultDisallowed = 0, - ZSTD_defaultAllowed = 1 -} ZSTD_defaultPolicy_e; - -symbolEncodingType_e -ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, - ZSTD_defaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy); - -size_t -ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, - const FSE_CTable* prevCTable, size_t prevCTableSize, - void* entropyWorkspace, size_t entropyWorkspaceSize); - -size_t ZSTD_encodeSequences( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); - -size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, - unsigned const* count, - unsigned const max); - -size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max); - -} - -#endif /* ZSTD_COMPRESS_SEQUENCES_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPRESS_LITERALS_H -#define ZSTD_COMPRESS_LITERALS_H - - /* ZSTD_hufCTables_t, ZSTD_minGain() */ - -namespace duckdb_zstd { - -size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2); - -} - -#endif /* ZSTD_COMPRESS_LITERALS_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_FAST_H -#define ZSTD_FAST_H - - /* U32 */ - - -namespace duckdb_zstd { - -void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm); -size_t ZSTD_compressBlock_fast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_fast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_fast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -} - -#endif /* ZSTD_FAST_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_DOUBLE_FAST_H -#define ZSTD_DOUBLE_FAST_H - - /* U32 */ - /* ZSTD_CCtx, size_t */ - -namespace duckdb_zstd { - -void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm); -size_t ZSTD_compressBlock_doubleFast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_doubleFast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_doubleFast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -} - -#endif /* ZSTD_DOUBLE_FAST_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_LAZY_H -#define ZSTD_LAZY_H - - - -namespace duckdb_zstd { - -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); - -void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ - -size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_greedy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -size_t ZSTD_compressBlock_btlazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_greedy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -size_t ZSTD_compressBlock_greedy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btlazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -} - -#endif /* ZSTD_LAZY_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_OPT_H -#define ZSTD_OPT_H - - - -namespace duckdb_zstd { - -/* used in ZSTD_loadDictionaryContent() */ -void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); - -size_t ZSTD_compressBlock_btopt( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - - -size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -size_t ZSTD_compressBlock_btopt_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - - /* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries - * and is only specific for the first block (no prefix) */ - -} - -#endif /* ZSTD_OPT_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_LDM_H -#define ZSTD_LDM_H - - /* ldmParams_t, U32 */ - /* ZSTD_CCtx, size_t */ - -/*-************************************* -* Long distance matching -***************************************/ - -#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT - -namespace duckdb_zstd { - -void ZSTD_ldm_fillHashTable( - ldmState_t* state, const BYTE* ip, - const BYTE* iend, ldmParams_t const* params); - -/** - * ZSTD_ldm_generateSequences(): - * - * Generates the sequences using the long distance match finder. - * Generates long range matching sequences in `sequences`, which parse a prefix - * of the source. `sequences` must be large enough to store every sequence, - * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. - * @returns 0 or an error code. - * - * NOTE: The user must have called ZSTD_window_update() for all of the input - * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. - * NOTE: This function returns an error if it runs out of space to store - * sequences. - */ -size_t ZSTD_ldm_generateSequences( - ldmState_t* ldms, rawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize); - -/** - * ZSTD_ldm_blockCompress(): - * - * Compresses a block using the predefined sequences, along with a secondary - * block compressor. The literals section of every sequence is passed to the - * secondary block compressor, and those sequences are interspersed with the - * predefined sequences. Returns the length of the last literals. - * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. - * `rawSeqStore.seq` may also be updated to split the last sequence between two - * blocks. - * @return The length of the last literals. - * - * NOTE: The source must be at most the maximum block size, but the predefined - * sequences can be any size, and may be longer than the block. In the case that - * they are longer than the block, the last sequences may need to be split into - * two. We handle that case correctly, and update `rawSeqStore` appropriately. - * NOTE: This function does not return any errors. - */ -size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -/** - * ZSTD_ldm_skipSequences(): - * - * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. - * Avoids emitting matches less than `minMatch` bytes. - * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). - */ -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, - U32 const minMatch); - - -/** ZSTD_ldm_getTableSize() : - * Estimate the space needed for long distance matching tables or 0 if LDM is - * disabled. - */ -size_t ZSTD_ldm_getTableSize(ldmParams_t params); - -/** ZSTD_ldm_getSeqSpace() : - * Return an upper bound on the number of sequences that can be produced by - * the long distance matcher, or 0 if LDM is disabled. - */ -size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); - -/** ZSTD_ldm_adjustParameters() : - * If the params->hashRateLog is not set, set it to its default value based on - * windowLog and params->hashLog. - * - * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to - * params->hashLog if it is not). - * - * Ensures that the minMatchLength >= targetLength during optimal parsing. - */ -void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams); - -} - -#endif /* ZSTD_FAST_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPRESS_ADVANCED_H -#define ZSTD_COMPRESS_ADVANCED_H - -/*-************************************* -* Dependencies -***************************************/ - - /* ZSTD_CCtx */ - -namespace duckdb_zstd { -/*-************************************* -* Target Compressed Block Size -***************************************/ - -/* ZSTD_compressSuperBlock() : - * Used to compress a super block when targetCBlockSize is being used. - * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ -size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - void const* src, size_t srcSize, - unsigned lastBlock); -} - - -#endif /* ZSTD_COMPRESS_ADVANCED_H */ - - -// LICENSE_CHANGE_END - - - -namespace duckdb_zstd { -/*-************************************* -* Helper functions -***************************************/ -/* ZSTD_compressBound() - * Note that the result from this function is only compatible with the "normal" - * full-block strategy. - * When there are a lot of small blocks due to frequent flush in streaming mode - * the overhead of headers can make the compressed data to be larger than the - * return value of ZSTD_compressBound(). - */ -size_t ZSTD_compressBound(size_t srcSize) { - return ZSTD_COMPRESSBOUND(srcSize); -} - - -/*-************************************* -* Context memory management -***************************************/ -struct ZSTD_CDict_s { - const void* dictContent; - size_t dictContentSize; - U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ - ZSTD_cwksp workspace; - ZSTD_matchState_t matchState; - ZSTD_compressedBlockState_t cBlockState; - ZSTD_customMem customMem; - U32 dictID; - int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ -}; /* typedef'd to ZSTD_CDict within "zstd.h" */ - -ZSTD_CCtx* ZSTD_createCCtx(void) -{ - return ZSTD_createCCtx_advanced({NULL, NULL, NULL}); -} - -static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) -{ - assert(cctx != NULL); - memset(cctx, 0, sizeof(*cctx)); - cctx->customMem = memManager; - cctx->bmi2 = 0; - { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); - assert(!ZSTD_isError(err)); - (void)err; - } -} - -ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) -{ - ZSTD_STATIC_ASSERT(zcss_init==0); - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); - if (!cctx) return NULL; - ZSTD_initCCtx(cctx, customMem); - return cctx; - } -} - -ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) -{ - ZSTD_cwksp ws; - ZSTD_CCtx* cctx; - if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ - if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ - ZSTD_cwksp_init(&ws, workspace, workspaceSize); - - cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); - if (cctx == NULL) return NULL; - - memset(cctx, 0, sizeof(ZSTD_CCtx)); - ZSTD_cwksp_move(&cctx->workspace, &ws); - cctx->staticSize = workspaceSize; - - /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ - if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; - cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE); - cctx->bmi2 = 0; - return cctx; -} - -/** - * Clears and frees all of the dictionaries in the CCtx. - */ -static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) -{ - ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); - ZSTD_freeCDict(cctx->localDict.cdict); - memset(&cctx->localDict, 0, sizeof(cctx->localDict)); - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); - cctx->cdict = NULL; -} - -static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) -{ - size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; - size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); - return bufferSize + cdictSize; -} - -static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) -{ - assert(cctx != NULL); - assert(cctx->staticSize == 0); - ZSTD_clearAllDicts(cctx); -#ifdef ZSTD_MULTITHREAD - ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; -#endif - ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); -} - -size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) -{ - if (cctx==NULL) return 0; /* support free on NULL */ - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "not compatible with static CCtx"); - { - int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); - ZSTD_freeCCtxContent(cctx); - if (!cctxInWorkspace) { - ZSTD_free(cctx, cctx->customMem); - } - } - return 0; -} - - -static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - return ZSTDMT_sizeof_CCtx(cctx->mtctx); -#else - (void)cctx; - return 0; -#endif -} - - -size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) -{ - if (cctx==NULL) return 0; /* support sizeof on NULL */ - /* cctx may be in the workspace */ - return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) - + ZSTD_cwksp_sizeof(&cctx->workspace) - + ZSTD_sizeof_localDict(cctx->localDict) - + ZSTD_sizeof_mtctx(cctx); -} - -size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) -{ - return ZSTD_sizeof_CCtx(zcs); /* same object */ -} - -/* private API call, for dictBuilder only */ -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } - -static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( - ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params cctxParams; - memset(&cctxParams, 0, sizeof(cctxParams)); - cctxParams.cParams = cParams; - cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - assert(!ZSTD_checkCParams(cParams)); - cctxParams.fParams.contentSizeFlag = 1; - return cctxParams; -} - -static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( - ZSTD_customMem customMem) -{ - ZSTD_CCtx_params* params; - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - params = (ZSTD_CCtx_params*)ZSTD_calloc( - sizeof(ZSTD_CCtx_params), customMem); - if (!params) { return NULL; } - params->customMem = customMem; - params->compressionLevel = ZSTD_CLEVEL_DEFAULT; - params->fParams.contentSizeFlag = 1; - return params; -} - -ZSTD_CCtx_params* ZSTD_createCCtxParams(void) -{ - return ZSTD_createCCtxParams_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) -{ - if (params == NULL) { return 0; } - ZSTD_free(params, params->customMem); - return 0; -} - -size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) -{ - return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); -} - -size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { - RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); - memset(cctxParams, 0, sizeof(*cctxParams)); - cctxParams->compressionLevel = compressionLevel; - cctxParams->fParams.contentSizeFlag = 1; - return 0; -} - -size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) -{ - RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - memset(cctxParams, 0, sizeof(*cctxParams)); - assert(!ZSTD_checkCParams(params.cParams)); - cctxParams->cParams = params.cParams; - cctxParams->fParams = params.fParams; - cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - return 0; -} - -/* ZSTD_assignParamsToCCtxParams() : - * params is presumed valid at this stage */ -static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) -{ - ZSTD_CCtx_params ret = *cctxParams; - assert(!ZSTD_checkCParams(params->cParams)); - ret.cParams = params->cParams; - ret.fParams = params->fParams; - ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - return ret; -} - -ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) -{ - ZSTD_bounds bounds = { 0, 0, 0 }; - - switch(param) - { - case ZSTD_c_compressionLevel: - bounds.lowerBound = ZSTD_minCLevel(); - bounds.upperBound = ZSTD_maxCLevel(); - return bounds; - - case ZSTD_c_windowLog: - bounds.lowerBound = ZSTD_WINDOWLOG_MIN; - bounds.upperBound = ZSTD_WINDOWLOG_MAX; - return bounds; - - case ZSTD_c_hashLog: - bounds.lowerBound = ZSTD_HASHLOG_MIN; - bounds.upperBound = ZSTD_HASHLOG_MAX; - return bounds; - - case ZSTD_c_chainLog: - bounds.lowerBound = ZSTD_CHAINLOG_MIN; - bounds.upperBound = ZSTD_CHAINLOG_MAX; - return bounds; - - case ZSTD_c_searchLog: - bounds.lowerBound = ZSTD_SEARCHLOG_MIN; - bounds.upperBound = ZSTD_SEARCHLOG_MAX; - return bounds; - - case ZSTD_c_minMatch: - bounds.lowerBound = ZSTD_MINMATCH_MIN; - bounds.upperBound = ZSTD_MINMATCH_MAX; - return bounds; - - case ZSTD_c_targetLength: - bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; - bounds.upperBound = ZSTD_TARGETLENGTH_MAX; - return bounds; - - case ZSTD_c_strategy: - bounds.lowerBound = ZSTD_STRATEGY_MIN; - bounds.upperBound = ZSTD_STRATEGY_MAX; - return bounds; - - case ZSTD_c_contentSizeFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_checksumFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_dictIDFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_nbWorkers: - bounds.lowerBound = 0; -#ifdef ZSTD_MULTITHREAD - bounds.upperBound = ZSTDMT_NBWORKERS_MAX; -#else - bounds.upperBound = 0; -#endif - return bounds; - - case ZSTD_c_jobSize: - bounds.lowerBound = 0; -#ifdef ZSTD_MULTITHREAD - bounds.upperBound = ZSTDMT_JOBSIZE_MAX; -#else - bounds.upperBound = 0; -#endif - return bounds; - - case ZSTD_c_overlapLog: -#ifdef ZSTD_MULTITHREAD - bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; - bounds.upperBound = ZSTD_OVERLAPLOG_MAX; -#else - bounds.lowerBound = 0; - bounds.upperBound = 0; -#endif - return bounds; - - case ZSTD_c_enableLongDistanceMatching: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_ldmHashLog: - bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; - bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; - return bounds; - - case ZSTD_c_ldmMinMatch: - bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; - bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; - return bounds; - - case ZSTD_c_ldmBucketSizeLog: - bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; - bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; - return bounds; - - case ZSTD_c_ldmHashRateLog: - bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; - bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; - return bounds; - - /* experimental parameters */ - case ZSTD_c_rsyncable: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_forceMaxWindow : - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_format: - ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); - bounds.lowerBound = ZSTD_f_zstd1; - bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ - return bounds; - - case ZSTD_c_forceAttachDict: - ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); - bounds.lowerBound = ZSTD_dictDefaultAttach; - bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ - return bounds; - - case ZSTD_c_literalCompressionMode: - ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); - bounds.lowerBound = ZSTD_lcm_auto; - bounds.upperBound = ZSTD_lcm_uncompressed; - return bounds; - - case ZSTD_c_targetCBlockSize: - bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; - bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; - return bounds; - - case ZSTD_c_srcSizeHint: - bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; - bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; - return bounds; - - default: - bounds.error = ERROR(parameter_unsupported); - return bounds; - } -} - -/* ZSTD_cParam_clampBounds: - * Clamps the value into the bounded range. - */ -static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) -{ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); - if (ZSTD_isError(bounds.error)) return bounds.error; - if (*value < bounds.lowerBound) *value = bounds.lowerBound; - if (*value > bounds.upperBound) *value = bounds.upperBound; - return 0; -} - -#define BOUNDCHECK(cParam, val) { \ - RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ - parameter_outOfBound, "Param out of bounds"); \ -} - - -static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) -{ - switch(param) - { - case ZSTD_c_compressionLevel: - case ZSTD_c_hashLog: - case ZSTD_c_chainLog: - case ZSTD_c_searchLog: - case ZSTD_c_minMatch: - case ZSTD_c_targetLength: - case ZSTD_c_strategy: - return 1; - - case ZSTD_c_format: - case ZSTD_c_windowLog: - case ZSTD_c_contentSizeFlag: - case ZSTD_c_checksumFlag: - case ZSTD_c_dictIDFlag: - case ZSTD_c_forceMaxWindow : - case ZSTD_c_nbWorkers: - case ZSTD_c_jobSize: - case ZSTD_c_overlapLog: - case ZSTD_c_rsyncable: - case ZSTD_c_enableLongDistanceMatching: - case ZSTD_c_ldmHashLog: - case ZSTD_c_ldmMinMatch: - case ZSTD_c_ldmBucketSizeLog: - case ZSTD_c_ldmHashRateLog: - case ZSTD_c_forceAttachDict: - case ZSTD_c_literalCompressionMode: - case ZSTD_c_targetCBlockSize: - case ZSTD_c_srcSizeHint: - default: - return 0; - } -} - -size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) -{ - DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); - if (cctx->streamStage != zcss_init) { - if (ZSTD_isUpdateAuthorized(param)) { - cctx->cParamsChanged = 1; - } else { - RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); - } } - - switch(param) - { - case ZSTD_c_nbWorkers: - RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, - "MT not compatible with static alloc"); - break; - - case ZSTD_c_compressionLevel: - case ZSTD_c_windowLog: - case ZSTD_c_hashLog: - case ZSTD_c_chainLog: - case ZSTD_c_searchLog: - case ZSTD_c_minMatch: - case ZSTD_c_targetLength: - case ZSTD_c_strategy: - case ZSTD_c_ldmHashRateLog: - case ZSTD_c_format: - case ZSTD_c_contentSizeFlag: - case ZSTD_c_checksumFlag: - case ZSTD_c_dictIDFlag: - case ZSTD_c_forceMaxWindow: - case ZSTD_c_forceAttachDict: - case ZSTD_c_literalCompressionMode: - case ZSTD_c_jobSize: - case ZSTD_c_overlapLog: - case ZSTD_c_rsyncable: - case ZSTD_c_enableLongDistanceMatching: - case ZSTD_c_ldmHashLog: - case ZSTD_c_ldmMinMatch: - case ZSTD_c_ldmBucketSizeLog: - case ZSTD_c_targetCBlockSize: - case ZSTD_c_srcSizeHint: - break; - - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } - return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); -} - -size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - ZSTD_cParameter param, int value) -{ - DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); - switch(param) - { - case ZSTD_c_format : - BOUNDCHECK(ZSTD_c_format, value); - CCtxParams->format = (ZSTD_format_e)value; - return (size_t)CCtxParams->format; - - case ZSTD_c_compressionLevel : { - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - if (value) { /* 0 : does not change current level */ - CCtxParams->compressionLevel = value; - } - if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; - return 0; /* return type (size_t) cannot represent negative values */ - } - - case ZSTD_c_windowLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_windowLog, value); - CCtxParams->cParams.windowLog = (U32)value; - return CCtxParams->cParams.windowLog; - - case ZSTD_c_hashLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_hashLog, value); - CCtxParams->cParams.hashLog = (U32)value; - return CCtxParams->cParams.hashLog; - - case ZSTD_c_chainLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_chainLog, value); - CCtxParams->cParams.chainLog = (U32)value; - return CCtxParams->cParams.chainLog; - - case ZSTD_c_searchLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_searchLog, value); - CCtxParams->cParams.searchLog = (U32)value; - return (size_t)value; - - case ZSTD_c_minMatch : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_minMatch, value); - CCtxParams->cParams.minMatch = value; - return CCtxParams->cParams.minMatch; - - case ZSTD_c_targetLength : - BOUNDCHECK(ZSTD_c_targetLength, value); - CCtxParams->cParams.targetLength = value; - return CCtxParams->cParams.targetLength; - - case ZSTD_c_strategy : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_strategy, value); - CCtxParams->cParams.strategy = (ZSTD_strategy)value; - return (size_t)CCtxParams->cParams.strategy; - - case ZSTD_c_contentSizeFlag : - /* Content size written in frame header _when known_ (default:1) */ - DEBUGLOG(4, "set content size flag = %u", (value!=0)); - CCtxParams->fParams.contentSizeFlag = value != 0; - return CCtxParams->fParams.contentSizeFlag; - - case ZSTD_c_checksumFlag : - /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ - CCtxParams->fParams.checksumFlag = value != 0; - return CCtxParams->fParams.checksumFlag; - - case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ - DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); - CCtxParams->fParams.noDictIDFlag = !value; - return !CCtxParams->fParams.noDictIDFlag; - - case ZSTD_c_forceMaxWindow : - CCtxParams->forceWindow = (value != 0); - return CCtxParams->forceWindow; - - case ZSTD_c_forceAttachDict : { - const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; - BOUNDCHECK(ZSTD_c_forceAttachDict, pref); - CCtxParams->attachDictPref = pref; - return CCtxParams->attachDictPref; - } - - case ZSTD_c_literalCompressionMode : { - const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; - BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); - CCtxParams->literalCompressionMode = lcm; - return CCtxParams->literalCompressionMode; - } - - case ZSTD_c_nbWorkers : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; -#else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - CCtxParams->nbWorkers = value; - return CCtxParams->nbWorkers; -#endif - - case ZSTD_c_jobSize : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; -#else - /* Adjust to the minimum non-default value. */ - if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) - value = ZSTDMT_JOBSIZE_MIN; - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - assert(value >= 0); - CCtxParams->jobSize = value; - return CCtxParams->jobSize; -#endif - - case ZSTD_c_overlapLog : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; -#else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); - CCtxParams->overlapLog = value; - return CCtxParams->overlapLog; -#endif - - case ZSTD_c_rsyncable : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; -#else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); - CCtxParams->rsyncable = value; - return CCtxParams->rsyncable; -#endif - - case ZSTD_c_enableLongDistanceMatching : - CCtxParams->ldmParams.enableLdm = (value!=0); - return CCtxParams->ldmParams.enableLdm; - - case ZSTD_c_ldmHashLog : - if (value!=0) /* 0 ==> auto */ - BOUNDCHECK(ZSTD_c_ldmHashLog, value); - CCtxParams->ldmParams.hashLog = value; - return CCtxParams->ldmParams.hashLog; - - case ZSTD_c_ldmMinMatch : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmMinMatch, value); - CCtxParams->ldmParams.minMatchLength = value; - return CCtxParams->ldmParams.minMatchLength; - - case ZSTD_c_ldmBucketSizeLog : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); - CCtxParams->ldmParams.bucketSizeLog = value; - return CCtxParams->ldmParams.bucketSizeLog; - - case ZSTD_c_ldmHashRateLog : - RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, - parameter_outOfBound, "Param out of bounds!"); - CCtxParams->ldmParams.hashRateLog = value; - return CCtxParams->ldmParams.hashRateLog; - - case ZSTD_c_targetCBlockSize : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_targetCBlockSize, value); - CCtxParams->targetCBlockSize = value; - return CCtxParams->targetCBlockSize; - - case ZSTD_c_srcSizeHint : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_srcSizeHint, value); - CCtxParams->srcSizeHint = value; - return CCtxParams->srcSizeHint; - - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } -} - -size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) -{ - return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); -} - -size_t ZSTD_CCtxParams_getParameter( - ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) -{ - switch(param) - { - case ZSTD_c_format : - *value = CCtxParams->format; - break; - case ZSTD_c_compressionLevel : - *value = CCtxParams->compressionLevel; - break; - case ZSTD_c_windowLog : - *value = (int)CCtxParams->cParams.windowLog; - break; - case ZSTD_c_hashLog : - *value = (int)CCtxParams->cParams.hashLog; - break; - case ZSTD_c_chainLog : - *value = (int)CCtxParams->cParams.chainLog; - break; - case ZSTD_c_searchLog : - *value = CCtxParams->cParams.searchLog; - break; - case ZSTD_c_minMatch : - *value = CCtxParams->cParams.minMatch; - break; - case ZSTD_c_targetLength : - *value = CCtxParams->cParams.targetLength; - break; - case ZSTD_c_strategy : - *value = (unsigned)CCtxParams->cParams.strategy; - break; - case ZSTD_c_contentSizeFlag : - *value = CCtxParams->fParams.contentSizeFlag; - break; - case ZSTD_c_checksumFlag : - *value = CCtxParams->fParams.checksumFlag; - break; - case ZSTD_c_dictIDFlag : - *value = !CCtxParams->fParams.noDictIDFlag; - break; - case ZSTD_c_forceMaxWindow : - *value = CCtxParams->forceWindow; - break; - case ZSTD_c_forceAttachDict : - *value = CCtxParams->attachDictPref; - break; - case ZSTD_c_literalCompressionMode : - *value = CCtxParams->literalCompressionMode; - break; - case ZSTD_c_nbWorkers : -#ifndef ZSTD_MULTITHREAD - assert(CCtxParams->nbWorkers == 0); -#endif - *value = CCtxParams->nbWorkers; - break; - case ZSTD_c_jobSize : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); -#else - assert(CCtxParams->jobSize <= INT_MAX); - *value = (int)CCtxParams->jobSize; - break; -#endif - case ZSTD_c_overlapLog : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); -#else - *value = CCtxParams->overlapLog; - break; -#endif - case ZSTD_c_rsyncable : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); -#else - *value = CCtxParams->rsyncable; - break; -#endif - case ZSTD_c_enableLongDistanceMatching : - *value = CCtxParams->ldmParams.enableLdm; - break; - case ZSTD_c_ldmHashLog : - *value = CCtxParams->ldmParams.hashLog; - break; - case ZSTD_c_ldmMinMatch : - *value = CCtxParams->ldmParams.minMatchLength; - break; - case ZSTD_c_ldmBucketSizeLog : - *value = CCtxParams->ldmParams.bucketSizeLog; - break; - case ZSTD_c_ldmHashRateLog : - *value = CCtxParams->ldmParams.hashRateLog; - break; - case ZSTD_c_targetCBlockSize : - *value = (int)CCtxParams->targetCBlockSize; - break; - case ZSTD_c_srcSizeHint : - *value = (int)CCtxParams->srcSizeHint; - break; - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } - return 0; -} - -/** ZSTD_CCtx_setParametersUsingCCtxParams() : - * just applies `params` into `cctx` - * no action is performed, parameters are merely stored. - * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. - * This is possible even if a compression is ongoing. - * In which case, new parameters will be applied on the fly, starting with next compression job. - */ -size_t ZSTD_CCtx_setParametersUsingCCtxParams( - ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) -{ - DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "The context is in the wrong stage!"); - RETURN_ERROR_IF(cctx->cdict, stage_wrong, - "Can't override parameters with cdict attached (some must " - "be inherited from the cdict)."); - - cctx->requestedParams = *params; - return 0; -} - -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't set pledgedSrcSize when not in init stage."); - cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; - return 0; -} - -/** - * Initializes the local dict using the requested parameters. - * NOTE: This does not use the pledged src size, because it may be used for more - * than one compression. - */ -static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) -{ - ZSTD_localDict* const dl = &cctx->localDict; - ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize); - if (dl->dict == NULL) { - /* No local dictionary. */ - assert(dl->dictBuffer == NULL); - assert(dl->cdict == NULL); - assert(dl->dictSize == 0); - return 0; - } - if (dl->cdict != NULL) { - assert(cctx->cdict == dl->cdict); - /* Local dictionary already initialized. */ - return 0; - } - assert(dl->dictSize > 0); - assert(cctx->cdict == NULL); - assert(cctx->prefixDict.dict == NULL); - - dl->cdict = ZSTD_createCDict_advanced( - dl->dict, - dl->dictSize, - ZSTD_dlm_byRef, - dl->dictContentType, - cParams, - cctx->customMem); - RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); - cctx->cdict = dl->cdict; - return 0; -} - -size_t ZSTD_CCtx_loadDictionary_advanced( - ZSTD_CCtx* cctx, const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't load a dictionary when ctx is not in init stage."); - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "no malloc for static CCtx"); - DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); - ZSTD_clearAllDicts(cctx); /* in case one already exists */ - if (dict == NULL || dictSize == 0) /* no dictionary mode */ - return 0; - if (dictLoadMethod == ZSTD_dlm_byRef) { - cctx->localDict.dict = dict; - } else { - void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); - RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); - memcpy(dictBuffer, dict, dictSize); - cctx->localDict.dictBuffer = dictBuffer; - cctx->localDict.dict = dictBuffer; - } - cctx->localDict.dictSize = dictSize; - cctx->localDict.dictContentType = dictContentType; - return 0; -} - -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( - ZSTD_CCtx* cctx, const void* dict, size_t dictSize) -{ - return ZSTD_CCtx_loadDictionary_advanced( - cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); -} - -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) -{ - return ZSTD_CCtx_loadDictionary_advanced( - cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); -} - - -size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't ref a dict when ctx not in init stage."); - /* Free the existing local cdict (if any) to save memory. */ - ZSTD_clearAllDicts(cctx); - cctx->cdict = cdict; - return 0; -} - -size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) -{ - return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); -} - -size_t ZSTD_CCtx_refPrefix_advanced( - ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't ref a prefix when ctx not in init stage."); - ZSTD_clearAllDicts(cctx); - if (prefix != NULL && prefixSize > 0) { - cctx->prefixDict.dict = prefix; - cctx->prefixDict.dictSize = prefixSize; - cctx->prefixDict.dictContentType = dictContentType; - } - return 0; -} - -/*! ZSTD_CCtx_reset() : - * Also dumps dictionary */ -size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) -{ - if ( (reset == ZSTD_reset_session_only) - || (reset == ZSTD_reset_session_and_parameters) ) { - cctx->streamStage = zcss_init; - cctx->pledgedSrcSizePlusOne = 0; - } - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't reset parameters only when not in init stage."); - ZSTD_clearAllDicts(cctx); - return ZSTD_CCtxParams_reset(&cctx->requestedParams); - } - return 0; -} - - -/** ZSTD_checkCParams() : - control CParam values remain within authorized range. - @return : 0, or an error code if one value is beyond authorized range */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) -{ - BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); - BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); - BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); - BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); - BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); - BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); - BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); - return 0; -} - -/** ZSTD_clampCParams() : - * make CParam values within valid range. - * @return : valid CParams */ -static ZSTD_compressionParameters -ZSTD_clampCParams(ZSTD_compressionParameters cParams) -{ -# define CLAMP_TYPE(cParam, val, type) { \ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ - if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ - } -# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) - CLAMP(ZSTD_c_windowLog, cParams.windowLog); - CLAMP(ZSTD_c_chainLog, cParams.chainLog); - CLAMP(ZSTD_c_hashLog, cParams.hashLog); - CLAMP(ZSTD_c_searchLog, cParams.searchLog); - CLAMP(ZSTD_c_minMatch, cParams.minMatch); - CLAMP(ZSTD_c_targetLength,cParams.targetLength); - CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); - return cParams; -} - -/** ZSTD_cycleLog() : - * condition for correct operation : hashLog > 1 */ -U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) -{ - U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); - return hashLog - btScale; -} - -/** ZSTD_adjustCParams_internal() : - * optimize `cPar` for a specified input (`srcSize` and `dictSize`). - * mostly downsize to reduce memory consumption and initialization latency. - * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. - * note : `srcSize==0` means 0! - * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ -static ZSTD_compressionParameters -ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, - unsigned long long srcSize, - size_t dictSize) -{ - static const U64 minSrcSize = 513; /* (1<<9) + 1 */ - static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); - assert(ZSTD_checkCParams(cPar)==0); - - if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) - srcSize = minSrcSize; - - /* resize windowLog if input is small enough, to use less memory */ - if ( (srcSize < maxWindowResize) - && (dictSize < maxWindowResize) ) { - U32 const tSize = (U32)(srcSize + dictSize); - static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; - U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : - ZSTD_highbit32(tSize-1) + 1; - if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; - } - if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; - { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); - if (cycleLog > cPar.windowLog) - cPar.chainLog -= (cycleLog - cPar.windowLog); - } - - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) - cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ - - return cPar; -} - -ZSTD_compressionParameters -ZSTD_adjustCParams(ZSTD_compressionParameters cPar, - unsigned long long srcSize, - size_t dictSize) -{ - cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ - if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); -} - -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); -static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); - -ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) -{ - ZSTD_compressionParameters cParams; - if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { - srcSizeHint = CCtxParams->srcSizeHint; - } - cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize); - if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; - if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; - if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; - if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; - if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; - if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch; - if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; - if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; - assert(!ZSTD_checkCParams(cParams)); - /* srcSizeHint == 0 means 0 */ - return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); -} - -static size_t -ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, - const U32 forCCtx) -{ - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); - size_t const hSize = ((size_t)1) << cParams->hashLog; - U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; - /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't - * surrounded by redzones in ASAN. */ - size_t const tableSpace = chainSize * sizeof(U32) - + hSize * sizeof(U32) - + h3Size * sizeof(U32); - size_t const optPotentialSpace = - ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((1<strategy >= ZSTD_btopt)) - ? optPotentialSpace - : 0; - DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", - (U32)chainSize, (U32)hSize, (U32)h3Size); - return tableSpace + optSpace; -} - -size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) -{ - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); - U32 const divider = (cParams.minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); - size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); - - size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); - size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); - - /* estimateCCtxSize is for one-shot compression. So no buffers should - * be needed. However, we still allocate two 0-sized buffers, which can - * take space under ASAN. */ - size_t const bufferSpace = ZSTD_cwksp_alloc_size(0) - + ZSTD_cwksp_alloc_size(0); - - size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); - - size_t const neededSpace = - cctxSpace + - entropySpace + - blockStateSpace + - ldmSpace + - ldmSeqSpace + - matchStateSize + - tokenSpace + - bufferSpace; - - DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); - return neededSpace; - } -} - -size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); -} - -static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); - return ZSTD_estimateCCtxSize_usingCParams(cParams); -} - -size_t ZSTD_estimateCCtxSize(int compressionLevel) -{ - int level; - size_t memBudget = 0; - for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { - size_t const newMB = ZSTD_estimateCCtxSize_internal(level); - if (newMB > memBudget) memBudget = newMB; - } - return memBudget; -} - -size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) -{ - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); - size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); - size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; - size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; - size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize) - + ZSTD_cwksp_alloc_size(outBuffSize); - - return CCtxSize + streamingSize; - } -} - -size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); -} - -static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); - return ZSTD_estimateCStreamSize_usingCParams(cParams); -} - -size_t ZSTD_estimateCStreamSize(int compressionLevel) -{ - int level; - size_t memBudget = 0; - for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { - size_t const newMB = ZSTD_estimateCStreamSize_internal(level); - if (newMB > memBudget) memBudget = newMB; - } - return memBudget; -} - -/* ZSTD_getFrameProgression(): - * tells how much data has been consumed (input) and produced (output) for current frame. - * able to count progression inside worker threads (non-blocking mode). - */ -ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - return ZSTDMT_getFrameProgression(cctx->mtctx); - } -#endif - { ZSTD_frameProgression fp; - size_t const buffered = (cctx->inBuff == NULL) ? 0 : - cctx->inBuffPos - cctx->inToCompress; - if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); - assert(buffered <= ZSTD_BLOCKSIZE_MAX); - fp.ingested = cctx->consumedSrcSize + buffered; - fp.consumed = cctx->consumedSrcSize; - fp.produced = cctx->producedCSize; - fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ - fp.currentJobID = 0; - fp.nbActiveWorkers = 0; - return fp; -} } - -/*! ZSTD_toFlushNow() - * Only useful for multithreading scenarios currently (nbWorkers >= 1). - */ -size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - return ZSTDMT_toFlushNow(cctx->mtctx); - } -#endif - (void)cctx; - return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ -} - -static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, - ZSTD_compressionParameters cParams2) -{ - (void)cParams1; - (void)cParams2; - assert(cParams1.windowLog == cParams2.windowLog); - assert(cParams1.chainLog == cParams2.chainLog); - assert(cParams1.hashLog == cParams2.hashLog); - assert(cParams1.searchLog == cParams2.searchLog); - assert(cParams1.minMatch == cParams2.minMatch); - assert(cParams1.targetLength == cParams2.targetLength); - assert(cParams1.strategy == cParams2.strategy); -} - -void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) -{ - int i; - for (i = 0; i < ZSTD_REP_NUM; ++i) - bs->rep[i] = ZSTDInternalConstants::repStartValue[i]; - bs->entropy.huf.repeatMode = HUF_repeat_none; - bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; - bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; - bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; -} - -/*! ZSTD_invalidateMatchState() - * Invalidate all the matches in the match finder tables. - * Requires nextSrc and base to be set (can be NULL). - */ -static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) -{ - ZSTD_window_clear(&ms->window); - - ms->nextToUpdate = ms->window.dictLimit; - ms->loadedDictEnd = 0; - ms->opt.litLengthSum = 0; /* force reset of btopt stats */ - ms->dictMatchState = NULL; -} - -/** - * Indicates whether this compression proceeds directly from user-provided - * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or - * whether the context needs to buffer the input/output (ZSTDb_buffered). - */ -typedef enum { - ZSTDb_not_buffered, - ZSTDb_buffered -} ZSTD_buffered_policy_e; - -/** - * Controls, for this matchState reset, whether the tables need to be cleared / - * prepared for the coming compression (ZSTDcrp_makeClean), or whether the - * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a - * subsequent operation will overwrite the table space anyways (e.g., copying - * the matchState contents in from a CDict). - */ -typedef enum { - ZSTDcrp_makeClean, - ZSTDcrp_leaveDirty -} ZSTD_compResetPolicy_e; - -/** - * Controls, for this matchState reset, whether indexing can continue where it - * left off (ZSTDirp_continue), or whether it needs to be restarted from zero - * (ZSTDirp_reset). - */ -typedef enum { - ZSTDirp_continue, - ZSTDirp_reset -} ZSTD_indexResetPolicy_e; - -typedef enum { - ZSTD_resetTarget_CDict, - ZSTD_resetTarget_CCtx -} ZSTD_resetTarget_e; - -static size_t -ZSTD_reset_matchState(ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - const ZSTD_compressionParameters* cParams, - const ZSTD_compResetPolicy_e crp, - const ZSTD_indexResetPolicy_e forceResetIndex, - const ZSTD_resetTarget_e forWho) -{ - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); - size_t const hSize = ((size_t)1) << cParams->hashLog; - U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; - - DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); - if (forceResetIndex == ZSTDirp_reset) { - ZSTD_window_init(&ms->window); - ZSTD_cwksp_mark_tables_dirty(ws); - } - - ms->hashLog3 = hashLog3; - - ZSTD_invalidateMatchState(ms); - - assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ - - ZSTD_cwksp_clear_tables(ws); - - DEBUGLOG(5, "reserving table space"); - /* table Space */ - ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); - ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); - ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); - RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, - "failed a workspace allocation in ZSTD_reset_matchState"); - - DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); - if (crp!=ZSTDcrp_leaveDirty) { - /* reset tables only */ - ZSTD_cwksp_clean_tables(ws); - } - - /* opt parser space */ - if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { - DEBUGLOG(4, "reserving optimal parser space"); - ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); - ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); - ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); - ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); - ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); - } - - ms->cParams = *cParams; - - RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, - "failed a workspace allocation in ZSTD_reset_matchState"); - - return 0; -} - -/* ZSTD_indexTooCloseToMax() : - * minor optimization : prefer memset() rather than reduceIndex() - * which is measurably slow in some circumstances (reported for Visual Studio). - * Works when re-using a context for a lot of smallish inputs : - * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, - * memset() will be triggered before reduceIndex(). - */ -#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) -static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) -{ - return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); -} - -/*! ZSTD_resetCCtx_internal() : - note : `params` are assumed fully validated at this stage */ -static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - ZSTD_CCtx_params params, - U64 const pledgedSrcSize, - ZSTD_compResetPolicy_e const crp, - ZSTD_buffered_policy_e const zbuff) -{ - ZSTD_cwksp* const ws = &zc->workspace; - DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", - (U32)pledgedSrcSize, params.cParams.windowLog); - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - - zc->isFirstBlock = 1; - - if (params.ldmParams.enableLdm) { - /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); - assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); - assert(params.ldmParams.hashRateLog < 32); - zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); - } - - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); - size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; - size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; - size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); - size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - - ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset; - - if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { - needsIndexReset = ZSTDirp_reset; - } - - if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); - - /* Check if workspace is large enough, alloc a new one if needed */ - { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize); - size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); - size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)); - - size_t const neededSpace = - cctxSpace + - entropySpace + - blockStateSpace + - ldmSpace + - ldmSeqSpace + - matchStateSize + - tokenSpace + - bufferSpace; - - int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; - int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); - - DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", - neededSpace>>10, matchStateSize>>10, bufferSpace>>10); - DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - - if (workspaceTooSmall || workspaceWasteful) { - DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", - ZSTD_cwksp_sizeof(ws) >> 10, - neededSpace >> 10); - - RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); - - needsIndexReset = ZSTDirp_reset; - - ZSTD_cwksp_free(ws, zc->customMem); - FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); - - DEBUGLOG(5, "reserving object space"); - /* Statically sized space. - * entropyWorkspace never moves, - * though prev/next block swap places */ - assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); - zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); - RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); - zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); - zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); - } } - - ZSTD_cwksp_clear(ws); - - /* init params */ - zc->appliedParams = params; - zc->blockState.matchState.cParams = params.cParams; - zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; - zc->consumedSrcSize = 0; - zc->producedCSize = 0; - if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) - zc->appliedParams.fParams.contentSizeFlag = 0; - DEBUGLOG(4, "pledged content size : %u ; flag : %u", - (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); - zc->blockSize = blockSize; - - XXH64_reset(&zc->xxhState, 0); - zc->stage = ZSTDcs_init; - zc->dictID = 0; - - ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - - /* ZSTD_wildcopy() is used to copy into the literals buffer, - * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. - */ - zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); - zc->seqStore.maxNbLit = blockSize; - - /* buffers */ - zc->inBuffSize = buffInSize; - zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); - zc->outBuffSize = buffOutSize; - zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); - - /* ldm bucketOffsets table */ - if (params.ldmParams.enableLdm) { - /* TODO: avoid memset? */ - size_t const ldmBucketSize = - ((size_t)1) << (params.ldmParams.hashLog - - params.ldmParams.bucketSizeLog); - zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); - memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); - } - - /* sequences storage */ - ZSTD_referenceExternalSequences(zc, NULL, 0); - zc->seqStore.maxNbSeq = maxNbSeq; - zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); - - FORWARD_IF_ERROR(ZSTD_reset_matchState( - &zc->blockState.matchState, - ws, - ¶ms.cParams, - crp, - needsIndexReset, - ZSTD_resetTarget_CCtx), ""); - - /* ldm hash table */ - if (params.ldmParams.enableLdm) { - /* TODO: avoid memset? */ - size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; - zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); - memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); - zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); - zc->maxNbLdmSequences = maxNbLdmSeq; - - ZSTD_window_init(&zc->ldmState.window); - ZSTD_window_clear(&zc->ldmState.window); - zc->ldmState.loadedDictEnd = 0; - } - - DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); - zc->initialized = 1; - - return 0; - } -} - -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { - int i; - for (i=0; iblockState.prevCBlock->rep[i] = 0; - assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); -} - -/* These are the approximate sizes for each strategy past which copying the - * dictionary tables into the working context is faster than using them - * in-place. - */ -static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { - 8 KB, /* unused */ - 8 KB, /* ZSTD_fast */ - 16 KB, /* ZSTD_dfast */ - 32 KB, /* ZSTD_greedy */ - 32 KB, /* ZSTD_lazy */ - 32 KB, /* ZSTD_lazy2 */ - 32 KB, /* ZSTD_btlazy2 */ - 32 KB, /* ZSTD_btopt */ - 8 KB, /* ZSTD_btultra */ - 8 KB /* ZSTD_btultra2 */ -}; - -static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - U64 pledgedSrcSize) -{ - size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; - return ( pledgedSrcSize <= cutoff - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || params->attachDictPref == ZSTD_dictForceAttach ) - && params->attachDictPref != ZSTD_dictForceCopy - && !params->forceWindow; /* dictMatchState isn't correctly - * handled in _enforceMaxDist */ -} - -static size_t -ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; - unsigned const windowLog = params.cParams.windowLog; - assert(windowLog != 0); - /* Resize working context table params for input only, since the dict - * has its own tables. */ - /* pledgeSrcSize == 0 means 0! */ - params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); - params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff), ""); - assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); - } - - { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc - - cdict->matchState.window.base); - const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; - if (cdictLen == 0) { - /* don't even attach dictionaries with no contents */ - DEBUGLOG(4, "skipping attaching empty dictionary"); - } else { - DEBUGLOG(4, "attaching dictionary into context"); - cctx->blockState.matchState.dictMatchState = &cdict->matchState; - - /* prep working match state so dict matches never have negative indices - * when they are translated to the working context's index space. */ - if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { - cctx->blockState.matchState.window.nextSrc = - cctx->blockState.matchState.window.base + cdictEnd; - ZSTD_window_clear(&cctx->blockState.matchState.window); - } - /* loadedDictEnd is expressed within the referential of the active context */ - cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; - } } - - cctx->dictID = cdict->dictID; - - /* copy block state */ - memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); - - return 0; -} - -static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; - - DEBUGLOG(4, "copying dictionary into context"); - - { unsigned const windowLog = params.cParams.windowLog; - assert(windowLog != 0); - /* Copy only compression parameters related to tables. */ - params.cParams = *cdict_cParams; - params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_leaveDirty, zbuff), ""); - assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); - assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); - assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); - } - - ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); - - /* copy tables */ - { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); - size_t const hSize = (size_t)1 << cdict_cParams->hashLog; - - memcpy(cctx->blockState.matchState.hashTable, - cdict->matchState.hashTable, - hSize * sizeof(U32)); - memcpy(cctx->blockState.matchState.chainTable, - cdict->matchState.chainTable, - chainSize * sizeof(U32)); - } - - /* Zero the hashTable3, since the cdict never fills it */ - { int const h3log = cctx->blockState.matchState.hashLog3; - size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - assert(cdict->matchState.hashLog3 == 0); - memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); - } - - ZSTD_cwksp_mark_tables_clean(&cctx->workspace); - - /* copy dictionary offsets */ - { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; - ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; - } - - cctx->dictID = cdict->dictID; - - /* copy block state */ - memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); - - return 0; -} - -/* We have a choice between copying the dictionary context into the working - * context, or referencing the dictionary context from the working context - * in-place. We decide here which strategy to use. */ -static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - - DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", - (unsigned)pledgedSrcSize); - - if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { - return ZSTD_resetCCtx_byAttachingCDict( - cctx, cdict, *params, pledgedSrcSize, zbuff); - } else { - return ZSTD_resetCCtx_byCopyingCDict( - cctx, cdict, *params, pledgedSrcSize, zbuff); - } -} - -/*! ZSTD_copyCCtx_internal() : - * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. - * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). - * The "context", in this case, refers to the hash and chain tables, - * entropy tables, and dictionary references. - * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. - * @return : 0, or an error code */ -static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, - const ZSTD_CCtx* srcCCtx, - ZSTD_frameParameters fParams, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - DEBUGLOG(5, "ZSTD_copyCCtx_internal"); - RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, - "Can't copy a ctx that's not in init stage."); - - memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); - { ZSTD_CCtx_params params = dstCCtx->requestedParams; - /* Copy only compression parameters related to tables. */ - params.cParams = srcCCtx->appliedParams.cParams; - params.fParams = fParams; - ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, - ZSTDcrp_leaveDirty, zbuff); - assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); - assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); - assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); - assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); - assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); - } - - ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); - - /* copy tables */ - { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); - size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; - int const h3log = srcCCtx->blockState.matchState.hashLog3; - size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - - memcpy(dstCCtx->blockState.matchState.hashTable, - srcCCtx->blockState.matchState.hashTable, - hSize * sizeof(U32)); - memcpy(dstCCtx->blockState.matchState.chainTable, - srcCCtx->blockState.matchState.chainTable, - chainSize * sizeof(U32)); - memcpy(dstCCtx->blockState.matchState.hashTable3, - srcCCtx->blockState.matchState.hashTable3, - h3Size * sizeof(U32)); - } - - ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); - - /* copy dictionary offsets */ - { - const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; - ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; - } - dstCCtx->dictID = srcCCtx->dictID; - - /* copy block state */ - memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); - - return 0; -} - -/*! ZSTD_copyCCtx() : - * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. - * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). - * pledgedSrcSize==0 means "unknown". -* @return : 0, or an error code */ -size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) -{ - ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); - ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); - if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; - fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); - - return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, - fParams, pledgedSrcSize, - zbuff); -} - - -#define ZSTD_ROWSIZE 16 -/*! ZSTD_reduceTable() : - * reduce table indexes by `reducerValue`, or squash to zero. - * PreserveMark preserves "unsorted mark" for btlazy2 strategy. - * It must be set to a clear 0/1 value, to remove branch during inlining. - * Presume table size is a multiple of ZSTD_ROWSIZE - * to help auto-vectorization */ -FORCE_INLINE_TEMPLATE void -ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) -{ - int const nbRows = (int)size / ZSTD_ROWSIZE; - int cellNb = 0; - int rowNb; - assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ - assert(size < (1U<<31)); /* can be casted to int */ - -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the table re-use logic is sound, and that we don't - * access table space that we haven't cleaned, we re-"poison" the table - * space every time we mark it dirty. - * - * This function however is intended to operate on those dirty tables and - * re-clean them. So when this function is used correctly, we can unpoison - * the memory it operated on. This introduces a blind spot though, since - * if we now try to operate on __actually__ poisoned memory, we will not - * detect that. */ - __msan_unpoison(table, size * sizeof(U32)); -#endif - - for (rowNb=0 ; rowNb < nbRows ; rowNb++) { - int column; - for (column=0; columncParams.hashLog; - ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); - } - - if (params->cParams.strategy != ZSTD_fast) { - U32 const chainSize = (U32)1 << params->cParams.chainLog; - if (params->cParams.strategy == ZSTD_btlazy2) - ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); - else - ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); - } - - if (ms->hashLog3) { - U32 const h3Size = (U32)1 << ms->hashLog3; - ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); - } -} - - -/*-******************************************************* -* Block entropic compression -*********************************************************/ - -/* See doc/zstd_compression_format.md for detailed format description */ - -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) -{ - const seqDef* const sequences = seqStorePtr->sequencesStart; - BYTE* const llCodeTable = seqStorePtr->llCode; - BYTE* const ofCodeTable = seqStorePtr->ofCode; - BYTE* const mlCodeTable = seqStorePtr->mlCode; - U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - U32 u; - assert(nbSeq <= seqStorePtr->maxNbSeq); - for (u=0; ulongLengthID==1) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID==2) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; -} - -/* ZSTD_useTargetCBlockSize(): - * Returns if target compressed block size param is being used. - * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. - * Returns 1 if true, 0 otherwise. */ -static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) -{ - DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); - return (cctxParams->targetCBlockSize != 0); -} - -/* ZSTD_compressSequences_internal(): - * actually compresses both literals and sequences */ -MEM_STATIC size_t -ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - void* entropyWorkspace, size_t entropyWkspSize, - const int bmi2) -{ - const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - unsigned count[MaxSeq+1]; - FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const seqDef* const sequences = seqStorePtr->sequencesStart; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - BYTE* seqHead; - BYTE* lastNCount = NULL; - - DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<litStart; - size_t const litSize = (size_t)(seqStorePtr->lit - literals); - size_t const cSize = ZSTD_compressLiterals( - &prevEntropy->huf, &nextEntropy->huf, - cctxParams->cParams.strategy, - ZSTD_disableLiteralsCompression(cctxParams), - op, dstCapacity, - literals, litSize, - entropyWorkspace, entropyWkspSize, - bmi2); - FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); - assert(cSize <= dstCapacity); - op += cSize; - } - - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, "Can't fit seq hdr in output buf!"); - if (nbSeq < 128) { - *op++ = (BYTE)nbSeq; - } else if (nbSeq < LONGNBSEQ) { - op[0] = (BYTE)((nbSeq>>8) + 0x80); - op[1] = (BYTE)nbSeq; - op+=2; - } else { - op[0]=0xFF; - MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); - op+=3; - } - assert(op <= oend); - if (nbSeq==0) { - /* Copy the old tables over as if we repeated them */ - memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); - return (size_t)(op - ostart); - } - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - assert(op <= oend); - - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, - count, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->fse.litlengthCTable, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, - sizeof(prevEntropy->fse.litlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for Offsets */ - { unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, - count, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->fse.offcodeCTable, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, - sizeof(prevEntropy->fse.offcodeCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for MatchLengths */ - { unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, - count, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->fse.matchlengthCTable, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, - sizeof(prevEntropy->fse.matchlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - - { size_t const bitstreamSize = ZSTD_encodeSequences( - op, (size_t)(oend - op), - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, - longOffsets, bmi2); - FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); - op += bitstreamSize; - assert(op <= oend); - /* zstd versions <= 1.3.4 mistakenly report corruption when - * FSE_readNCount() receives a buffer < 4 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1146. - * This can happen when the last set_compressed table present is 2 - * bytes and the bitstream is only one byte. - * In this exceedingly rare case, we will simply emit an uncompressed - * block, since it isn't worth optimizing. - */ - if (lastNCount && (op - lastNCount) < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(op - lastNCount == 3); - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " - "emitting an uncompressed block."); - return 0; - } - } - - DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); - return (size_t)(op - ostart); -} - -MEM_STATIC size_t -ZSTD_compressSequences(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - size_t srcSize, - void* entropyWorkspace, size_t entropyWkspSize, - int bmi2) -{ - size_t const cSize = ZSTD_compressSequences_internal( - seqStorePtr, prevEntropy, nextEntropy, cctxParams, - dst, dstCapacity, - entropyWorkspace, entropyWkspSize, bmi2); - if (cSize == 0) return 0; - /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. - * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. - */ - if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) - return 0; /* block not compressed */ - FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed"); - - /* Check compressibility */ - { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); - if (cSize >= maxCSize) return 0; /* block not compressed */ - } - - return cSize; -} - -/* ZSTD_selectBlockCompressor() : - * Not static, but internal use only (used by long distance matcher) - * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) -{ - static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { - { ZSTD_compressBlock_fast /* default for 0 */, - ZSTD_compressBlock_fast, - ZSTD_compressBlock_doubleFast, - ZSTD_compressBlock_greedy, - ZSTD_compressBlock_lazy, - ZSTD_compressBlock_lazy2, - ZSTD_compressBlock_btlazy2, - ZSTD_compressBlock_btopt, - ZSTD_compressBlock_btultra, - ZSTD_compressBlock_btultra2 }, - { ZSTD_compressBlock_fast_extDict /* default for 0 */, - ZSTD_compressBlock_fast_extDict, - ZSTD_compressBlock_doubleFast_extDict, - ZSTD_compressBlock_greedy_extDict, - ZSTD_compressBlock_lazy_extDict, - ZSTD_compressBlock_lazy2_extDict, - ZSTD_compressBlock_btlazy2_extDict, - ZSTD_compressBlock_btopt_extDict, - ZSTD_compressBlock_btultra_extDict, - ZSTD_compressBlock_btultra_extDict }, - { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, - ZSTD_compressBlock_fast_dictMatchState, - ZSTD_compressBlock_doubleFast_dictMatchState, - ZSTD_compressBlock_greedy_dictMatchState, - ZSTD_compressBlock_lazy_dictMatchState, - ZSTD_compressBlock_lazy2_dictMatchState, - ZSTD_compressBlock_btlazy2_dictMatchState, - ZSTD_compressBlock_btopt_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState } - }; - ZSTD_blockCompressor selectedCompressor; - ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); - - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; - assert(selectedCompressor != NULL); - return selectedCompressor; -} - -static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, - const BYTE* anchor, size_t lastLLSize) -{ - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; -} - -void ZSTD_resetSeqStore(seqStore_t* ssPtr) -{ - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; -} - -typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; - -static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) -{ - ZSTD_matchState_t* const ms = &zc->blockState.matchState; - DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); - assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - /* Assert that we have correctly flushed the ctx params into the ms's copy */ - ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - if (srcSize < MIN_CBLOCK_SIZE+ZSTDInternalConstants::ZSTD_blockHeaderSize+1) { - ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); - return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ - } - ZSTD_resetSeqStore(&(zc->seqStore)); - /* required for optimal parser to read stats from dictionary */ - ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; - /* tell the optimal parser how we expect to compress literals */ - ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; - /* a gap between an attached dict and the current window is not safe, - * they must remain adjacent, - * and when that stops being the case, the dict must be unset */ - assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); - - /* limited update after a very long match */ - { const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const U32 current = (U32)(istart-base); - if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ - if (current > ms->nextToUpdate + 384) - ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); - } - - /* select and store sequences */ - { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); - size_t lastLLSize; - { int i; - for (i = 0; i < ZSTD_REP_NUM; ++i) - zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; - } - if (zc->externSeqStore.pos < zc->externSeqStore.size) { - assert(!zc->appliedParams.ldmParams.enableLdm); - /* Updates ldmSeqStore.pos */ - lastLLSize = - ZSTD_ldm_blockCompress(&zc->externSeqStore, - ms, &zc->seqStore, - zc->blockState.nextCBlock->rep, - src, srcSize); - assert(zc->externSeqStore.pos <= zc->externSeqStore.size); - } else if (zc->appliedParams.ldmParams.enableLdm) { - rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; - - ldmSeqStore.seq = zc->ldmSequences; - ldmSeqStore.capacity = zc->maxNbLdmSequences; - /* Updates ldmSeqStore.size */ - FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, - &zc->appliedParams.ldmParams, - src, srcSize), ""); - /* Updates ldmSeqStore.pos */ - lastLLSize = - ZSTD_ldm_blockCompress(&ldmSeqStore, - ms, &zc->seqStore, - zc->blockState.nextCBlock->rep, - src, srcSize); - assert(ldmSeqStore.pos == ldmSeqStore.size); - } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); - lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); - } - { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; - ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); - } } - return ZSTDbss_compress; -} - -static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) -{ - const seqStore_t* seqStore = ZSTD_getSeqStore(zc); - const seqDef* seqs = seqStore->sequencesStart; - size_t seqsSize = seqStore->sequences - seqs; - - ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; - size_t i; size_t position; int repIdx; - - assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; - - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - - if (outSeqs[i].offset <= ZSTD_REP_NUM) { - outSeqs[i].rep = outSeqs[i].offset; - repIdx = (unsigned int)i - outSeqs[i].offset; - - if (outSeqs[i].litLength == 0) { - if (outSeqs[i].offset < 3) { - --repIdx; - } else { - repIdx = (unsigned int)i - 1; - } - ++outSeqs[i].rep; - } - assert(repIdx >= -3); - outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : ZSTDInternalConstants::repStartValue[-repIdx - 1]; - if (outSeqs[i].rep == 4) { - --outSeqs[i].offset; - } - } else { - outSeqs[i].offset -= ZSTD_REP_NUM; - } - - position += outSeqs[i].litLength; - outSeqs[i].matchPos = (unsigned int)position; - position += outSeqs[i].matchLength; - } - zc->seqCollector.seqIndex += seqsSize; -} - -size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize) -{ - const size_t dstCapacity = ZSTD_compressBound(srcSize); - void* dst = ZSTD_malloc(dstCapacity, ZSTDInternalConstants::ZSTD_defaultCMem); - SeqCollector seqCollector; - - RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); - - seqCollector.collectSequences = 1; - seqCollector.seqStart = outSeqs; - seqCollector.seqIndex = 0; - seqCollector.maxSequences = outSeqsSize; - zc->seqCollector = seqCollector; - - ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); - ZSTD_free(dst, ZSTDInternalConstants::ZSTD_defaultCMem); - return zc->seqCollector.seqIndex; -} - -/* Returns true if the given block is a RLE block */ -static int ZSTD_isRLE(const BYTE *ip, size_t length) { - size_t i; - if (length < 2) return 1; - for (i = 1; i < length; ++i) { - if (ip[0] != ip[i]) return 0; - } - return 1; -} - -/* Returns true if the given block may be RLE. - * This is just a heuristic based on the compressibility. - * It may return both false positives and false negatives. - */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore) -{ - size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); - size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - - return nbSeqs < 4 && nbLits < 10; -} - -static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) -{ - ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; - zc->blockState.prevCBlock = zc->blockState.nextCBlock; - zc->blockState.nextCBlock = tmp; -} - -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) -{ - /* This the upper bound for the length of an rle block. - * This isn't the actual upper bound. Finding the real threshold - * needs further investigation. - */ - const U32 rleMaxLength = 25; - size_t cSize; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, - (unsigned)zc->blockState.matchState.nextToUpdate); - - { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); - if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } - } - - if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); - return 0; - } - - /* encode sequences and literals */ - cSize = ZSTD_compressSequences(&zc->seqStore, - &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - dst, dstCapacity, - srcSize, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, - zc->bmi2); - - if (frame && - /* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder (cli only) to throw a "should consume all input error." - * This is only an issue for zstd <= v1.4.3 - */ - !zc->isFirstBlock && - cSize < rleMaxLength && - ZSTD_isRLE(ip, srcSize)) - { - cSize = 1; - op[0] = ip[0]; - } - -out: - if (!ZSTD_isError(cSize) && cSize > 1) { - ZSTD_confirmRepcodesAndEntropyTables(zc); - } - /* We check that dictionaries have offset codes available for the first - * block. After the first block, the offcode table might not have large - * enough codes to represent the offsets in the data. - */ - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - - return cSize; -} - -static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const size_t bss, U32 lastBlock) -{ - DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); - if (bss == ZSTDbss_compress) { - if (/* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder (cli only) to throw a "should consume all input error." - * This is only an issue for zstd <= v1.4.3 - */ - !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore) && - ZSTD_isRLE((BYTE const*)src, srcSize)) - { - return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); - } - /* Attempt superblock compression. - * - * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the - * standard ZSTD_compressBound(). This is a problem, because even if we have - * space now, taking an extra byte now could cause us to run out of space later - * and violate ZSTD_compressBound(). - * - * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. - * - * In order to respect ZSTD_compressBound() we must attempt to emit a raw - * uncompressed block in these cases: - * * cSize == 0: Return code for an uncompressed block. - * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). - * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of - * output space. - * * cSize >= blockBound(srcSize): We have expanded the block too much so - * emit an uncompressed block. - */ - { - size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); - if (cSize != ERROR(dstSize_tooSmall)) { - size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); - if (cSize != 0 && cSize < maxCSize + ZSTDInternalConstants::ZSTD_blockHeaderSize) { - ZSTD_confirmRepcodesAndEntropyTables(zc); - return cSize; - } - } - } - } - - DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); - /* Superblock compression failed, attempt to emit a single no compress block. - * The decoder will be able to stream this block since it is uncompressed. - */ - return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); -} - -static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastBlock) -{ - size_t cSize = 0; - const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); - - cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); - - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - - return cSize; -} - -static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - void const* ip, - void const* iend) -{ - if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { - U32 const maxDist = (U32)1 << params->cParams.windowLog; - U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); - U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); - ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); - ZSTD_cwksp_mark_tables_dirty(ws); - ZSTD_reduceIndex(ms, params, correction); - ZSTD_cwksp_mark_tables_clean(ws); - if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; - else ms->nextToUpdate -= correction; - /* invalidate dictionaries on overflow correction */ - ms->loadedDictEnd = 0; - ms->dictMatchState = NULL; - } -} - -/*! ZSTD_compress_frameChunk() : -* Compress a chunk of data into one or multiple blocks. -* All blocks will be terminated, all input will be consumed. -* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. -* Frame is supposed already started (header already produced) -* @return : compressed size, or an error code -*/ -static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastFrameChunk) -{ - size_t blockSize = cctx->blockSize; - size_t remaining = srcSize; - const BYTE* ip = (const BYTE*)src; - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; - - assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); - - DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); - if (cctx->appliedParams.fParams.checksumFlag && srcSize) - XXH64_update(&cctx->xxhState, src, srcSize); - - while (remaining) { - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; - U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - - RETURN_ERROR_IF(dstCapacity < ZSTDInternalConstants::ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, - dstSize_tooSmall, - "not enough space to store compressed block"); - if (remaining < blockSize) blockSize = remaining; - - ZSTD_overflowCorrectIfNeeded( - ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); - ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); - - /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ - if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; - - { size_t cSize; - if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { - cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); - assert(cSize > 0); - assert(cSize <= blockSize + ZSTDInternalConstants::ZSTD_blockHeaderSize); - } else { - cSize = ZSTD_compressBlock_internal(cctx, - op+ZSTDInternalConstants::ZSTD_blockHeaderSize, dstCapacity-ZSTDInternalConstants::ZSTD_blockHeaderSize, - ip, blockSize, 1 /* frame */); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); - - if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - } else { - U32 const cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTDInternalConstants::ZSTD_blockHeaderSize; - } - } - - - ip += blockSize; - assert(remaining >= blockSize); - remaining -= blockSize; - op += cSize; - assert(dstCapacity >= cSize); - dstCapacity -= cSize; - cctx->isFirstBlock = 0; - DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", - (unsigned)cSize); - } } - - if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; - return (size_t)(op-ostart); -} - - -static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, - const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) -{ BYTE* const op = (BYTE*)dst; - U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ - U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ - U32 const checksumFlag = params->fParams.checksumFlag>0; - U32 const windowSize = (U32)1 << params->cParams.windowLog; - U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); - BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); - U32 const fcsCode = params->fParams.contentSizeFlag ? - (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ - BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); - size_t pos=0; - - assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); - RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, - "dst buf is too small to fit worst-case frame header size."); - DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", - !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); - - if (params->format == ZSTD_f_zstd1) { - MEM_writeLE32(dst, ZSTD_MAGICNUMBER); - pos = 4; - } - op[pos++] = frameHeaderDescriptionByte; - if (!singleSegment) op[pos++] = windowLogByte; - switch(dictIDSizeCode) - { - default: assert(0); /* impossible */ - case 0 : break; - case 1 : op[pos] = (BYTE)(dictID); pos++; break; - case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; - case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; - } - switch(fcsCode) - { - default: assert(0); /* impossible */ - case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; - case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; - case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; - case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; - } - return pos; -} - -/* ZSTD_writeLastEmptyBlock() : - * output an empty Block with end-of-frame mark to complete a frame - * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) - * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong, - "wrong cctx stage"); - RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, - parameter_unsupported, - "incompatible with ldm"); - cctx->externSeqStore.seq = seq; - cctx->externSeqStore.size = nbSeq; - cctx->externSeqStore.capacity = nbSeq; - cctx->externSeqStore.pos = 0; - return 0; -} - - -static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 frame, U32 lastFrameChunk) -{ - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; - size_t fhSize = 0; - - DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", - cctx->stage, (unsigned)srcSize); - RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, - "missing init (ZSTD_compressBegin)"); - - if (frame && (cctx->stage==ZSTDcs_init)) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, - cctx->pledgedSrcSizePlusOne-1, cctx->dictID); - FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); - assert(fhSize <= dstCapacity); - dstCapacity -= fhSize; - dst = (char*)dst + fhSize; - cctx->stage = ZSTDcs_ongoing; - } - - if (!srcSize) return fhSize; /* do not generate an empty block if no input */ - - if (!ZSTD_window_update(&ms->window, src, srcSize)) { - ms->nextToUpdate = ms->window.dictLimit; - } - if (cctx->appliedParams.ldmParams.enableLdm) { - ZSTD_window_update(&cctx->ldmState.window, src, srcSize); - } - - if (!frame) { - /* overflow check and correction for block mode */ - ZSTD_overflowCorrectIfNeeded( - ms, &cctx->workspace, &cctx->appliedParams, - src, (BYTE const*)src + srcSize); - } - - DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); - { size_t const cSize = frame ? - ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : - ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); - FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); - cctx->consumedSrcSize += srcSize; - cctx->producedCSize += (cSize + fhSize); - assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); - if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); - RETURN_ERROR_IF( - cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, - srcSize_wrong, - "error : pledgedSrcSize = %u, while realSrcSize >= %u", - (unsigned)cctx->pledgedSrcSizePlusOne-1, - (unsigned)cctx->consumedSrcSize); - } - return cSize + fhSize; - } -} - -size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); -} - - -size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) -{ - ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; - assert(!ZSTD_checkCParams(cParams)); - return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); -} - -size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); - { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } - - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); -} - -/*! ZSTD_loadDictionaryContent() : - * @return : 0, or an error code - */ -static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, - ldmState_t* ls, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* src, size_t srcSize, - ZSTD_dictTableLoadMethod_e dtlm) -{ - const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - - ZSTD_window_update(&ms->window, src, srcSize); - ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); - - if (params->ldmParams.enableLdm && ls != NULL) { - ZSTD_window_update(&ls->window, src, srcSize); - ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); - } - - /* Assert that we the ms params match the params we're being given */ - ZSTD_assertEqualCParams(params->cParams, ms->cParams); - - if (srcSize <= HASH_READ_SIZE) return 0; - - while (iend - ip > HASH_READ_SIZE) { - size_t const remaining = (size_t)(iend - ip); - size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); - const BYTE* const ichunk = ip + chunk; - - ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); - - if (params->ldmParams.enableLdm && ls != NULL) - ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); - - switch(params->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, ichunk, dtlm); - break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); - break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE) - ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); - break; - - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - if (chunk >= HASH_READ_SIZE) - ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); - break; - - default: - assert(0); /* not possible : not a valid strategy id */ - } - - ip = ichunk; - } - - ms->nextToUpdate = (U32)(iend - ms->window.base); - return 0; -} - - -/* Dictionaries that assign zero probability to symbols that show up causes problems - when FSE encoding. Refuse dictionaries that assign zero probability to symbols - that we may encounter during compression. - NOTE: This behavior is not standard and could be improved in the future. */ -static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { - U32 s; - RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols"); - for (s = 0; s <= maxSymbolValue; ++s) { - RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols"); - } - return 0; -} - -size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, - short* offcodeNCount, unsigned* offcodeMaxValue, - const void* const dict, size_t dictSize) -{ - const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ - const BYTE* const dictEnd = dictPtr + dictSize; - dictPtr += 8; - bs->entropy.huf.repeatMode = HUF_repeat_check; - - { unsigned maxSymbolValue = 255; - unsigned hasZeroWeights = 1; - size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, - dictEnd-dictPtr, &hasZeroWeights); - - /* We only set the loaded table as valid if it contains all non-zero - * weights. Otherwise, we set it to check */ - if (!hasZeroWeights) - bs->entropy.huf.repeatMode = HUF_repeat_valid; - - RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); - dictPtr += hufHeaderSize; - } - - { unsigned offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); - /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ - /* fill all offset symbols to avoid garbage at end of table */ - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.offcodeCTable, - offcodeNCount, MaxOff, offcodeLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += offcodeHeaderSize; - } - - { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); - /* Every match length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), ""); - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.matchlengthCTable, - matchlengthNCount, matchlengthMaxValue, matchlengthLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += matchlengthHeaderSize; - } - - { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); - /* Every literal length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), ""); - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.litlengthCTable, - litlengthNCount, litlengthMaxValue, litlengthLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += litlengthHeaderSize; - } - - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); - bs->rep[0] = MEM_readLE32(dictPtr+0); - bs->rep[1] = MEM_readLE32(dictPtr+4); - bs->rep[2] = MEM_readLE32(dictPtr+8); - dictPtr += 12; - - return dictPtr - (const BYTE*)dict; -} - -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : dictID, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed >= 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) -{ - const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize; - short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff; - size_t dictID; - size_t eSize; - - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); - - dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); - eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); - FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); - dictPtr += eSize; - - { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); - U32 offcodeMax = MaxOff; - if (dictContentSize <= ((U32)-1) - 128 KB) { - U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ - offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ - } - /* All offset values <= dictContentSize + 128 KB must be representable */ - FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), ""); - /* All repCodes must be <= dictContentSize and != 0*/ - { U32 u; - for (u=0; u<3; u++) { - RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); - RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); - } } - - bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; - bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; - bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; - FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( - ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); - return dictID; - } -} - -/** ZSTD_compress_insertDictionary() : -* @return : dictID, or an error code */ -static size_t -ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ldmState_t* ls, - ZSTD_cwksp* ws, - const ZSTD_CCtx_params* params, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) -{ - DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); - if ((dict==NULL) || (dictSize<8)) { - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); - return 0; - } - - ZSTD_reset_compressedBlockState(bs); - - /* dict restricted modes */ - if (dictContentType == ZSTD_dct_rawContent) - return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); - - if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { - if (dictContentType == ZSTD_dct_auto) { - DEBUGLOG(4, "raw content dictionary detected"); - return ZSTD_loadDictionaryContent( - ms, ls, ws, params, dict, dictSize, dtlm); - } - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); - assert(0); /* impossible */ - } - - /* dict as full zstd dictionary */ - return ZSTD_loadZstdDictionary( - bs, ms, ws, params, dict, dictSize, dtlm, workspace); -} - -#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) -#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) - -/*! ZSTD_compressBegin_internal() : - * @return : 0, or an error code */ -static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); - /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - if ( (cdict) - && (cdict->dictContentSize > 0) - && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF - || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0) - && (params->attachDictPref != ZSTD_dictForceLoad) ) { - return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); - } - - FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff) , ""); - { size_t const dictID = cdict ? - ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, - cdict->dictContentSize, dictContentType, dtlm, - cctx->entropyWorkspace) - : ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, - dictContentType, dtlm, cctx->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= UINT_MAX); - cctx->dictID = (U32)dictID; - } - return 0; -} - -size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); - /* compression parameters verification and optimization */ - FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); - return ZSTD_compressBegin_internal(cctx, - dict, dictSize, dictContentType, dtlm, - cdict, - params, pledgedSrcSize, - ZSTDb_not_buffered); -} - -/*! ZSTD_compressBegin_advanced() : -* @return : 0, or an error code */ -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize) -{ - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - return ZSTD_compressBegin_advanced_internal(cctx, - dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, - NULL /*cdict*/, - &cctxParams, pledgedSrcSize); -} - -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); -} - -size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) -{ - return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); -} - - -/*! ZSTD_writeEpilogue() : -* Ends a frame. -* @return : nb of bytes written into dst (or an error code) */ -static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - size_t fhSize = 0; - - DEBUGLOG(4, "ZSTD_writeEpilogue"); - RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); - - /* special case : empty frame */ - if (cctx->stage == ZSTDcs_init) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); - FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); - dstCapacity -= fhSize; - op += fhSize; - cctx->stage = ZSTDcs_ongoing; - } - - if (cctx->stage != ZSTDcs_ending) { - /* write one last empty block, make it the "last" block */ - U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); - MEM_writeLE32(op, cBlockHeader24); - op += ZSTDInternalConstants::ZSTD_blockHeaderSize; - dstCapacity -= ZSTDInternalConstants::ZSTD_blockHeaderSize; - } - - if (cctx->appliedParams.fParams.checksumFlag) { - U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); - DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); - MEM_writeLE32(op, checksum); - op += 4; - } - - cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ - return op-ostart; -} - -size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t endResult; - size_t const cSize = ZSTD_compressContinue_internal(cctx, - dst, dstCapacity, src, srcSize, - 1 /* frame mode */, 1 /* last chunk */); - FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); - endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); - FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); - assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); - if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); - DEBUGLOG(4, "end of frame : controlling src size"); - RETURN_ERROR_IF( - cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, - srcSize_wrong, - "error : pledgedSrcSize = %u, while realSrcSize = %u", - (unsigned)cctx->pledgedSrcSizePlusOne-1, - (unsigned)cctx->consumedSrcSize); - } - return cSize + endResult; -} - - -static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_parameters* params) -{ - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); - DEBUGLOG(4, "ZSTD_compress_internal"); - return ZSTD_compress_advanced_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - &cctxParams); -} - -size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params) -{ - DEBUGLOG(4, "ZSTD_compress_advanced"); - FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); - return ZSTD_compress_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - ¶ms); -} - -/* Internal */ -size_t ZSTD_compress_advanced_internal( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_CCtx_params* params) -{ - DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - params, srcSize, ZSTDb_not_buffered) , ""); - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} - -size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, - int compressionLevel) -{ - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); - assert(params.fParams.contentSizeFlag == 1); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); -} - -size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); - assert(cctx != NULL); - return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); -} - -size_t ZSTD_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel) -{ - size_t result; - ZSTD_CCtx ctxBody; - ZSTD_initCCtx(&ctxBody, ZSTDInternalConstants::ZSTD_defaultCMem); - result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); - ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ - return result; -} - - -/* ===== Dictionary API ===== */ - -/*! ZSTD_estimateCDictSize_advanced() : - * Estimate amount of memory that will be needed to create a dictionary with following arguments */ -size_t ZSTD_estimateCDictSize_advanced( - size_t dictSize, ZSTD_compressionParameters cParams, - ZSTD_dictLoadMethod_e dictLoadMethod) -{ - DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); - return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) - + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); -} - -size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); -} - -size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) -{ - if (cdict==NULL) return 0; /* support sizeof on NULL */ - DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); - /* cdict may be in the workspace */ - return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) - + ZSTD_cwksp_sizeof(&cdict->workspace); -} - -static size_t ZSTD_initCDict_internal( - ZSTD_CDict* cdict, - const void* dictBuffer, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) -{ - DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); - assert(!ZSTD_checkCParams(cParams)); - cdict->matchState.cParams = cParams; - if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { - cdict->dictContent = dictBuffer; - } else { - void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); - RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); - cdict->dictContent = internalBuffer; - memcpy(internalBuffer, dictBuffer, dictSize); - } - cdict->dictContentSize = dictSize; - - cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); - - - /* Reset the state to no dictionary */ - ZSTD_reset_compressedBlockState(&cdict->cBlockState); - FORWARD_IF_ERROR(ZSTD_reset_matchState( - &cdict->matchState, - &cdict->workspace, - &cParams, - ZSTDcrp_makeClean, - ZSTDirp_reset, - ZSTD_resetTarget_CDict), ""); - /* (Maybe) load the dictionary - * Skips loading the dictionary if it is < 8 bytes. - */ - { ZSTD_CCtx_params params; - memset(¶ms, 0, sizeof(params)); - params.compressionLevel = ZSTD_CLEVEL_DEFAULT; - params.fParams.contentSizeFlag = 1; - params.cParams = cParams; - { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, - ¶ms, cdict->dictContent, cdict->dictContentSize, - dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= (size_t)(U32)-1); - cdict->dictID = (U32)dictID; - } - } - - return 0; -} - -ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams, ZSTD_customMem customMem) -{ - DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - - { size_t const workspaceSize = - ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + - ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + - ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + - (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); - void* const workspace = ZSTD_malloc(workspaceSize, customMem); - ZSTD_cwksp ws; - ZSTD_CDict* cdict; - - if (!workspace) { - ZSTD_free(workspace, customMem); - return NULL; - } - - ZSTD_cwksp_init(&ws, workspace, workspaceSize); - - cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); - assert(cdict != NULL); - ZSTD_cwksp_move(&cdict->workspace, &ws); - cdict->customMem = customMem; - cdict->compressionLevel = 0; /* signals advanced API usage */ - - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dictBuffer, dictSize, - dictLoadMethod, dictContentType, - cParams) )) { - ZSTD_freeCDict(cdict); - return NULL; - } - - return cdict; - } -} - -ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTDInternalConstants::ZSTD_defaultCMem); - if (cdict) - cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; - return cdict; -} - -ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byRef, ZSTD_dct_auto, - cParams, ZSTDInternalConstants::ZSTD_defaultCMem); -} - -size_t ZSTD_freeCDict(ZSTD_CDict* cdict) -{ - if (cdict==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = cdict->customMem; - int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); - ZSTD_cwksp_free(&cdict->workspace, cMem); - if (!cdictInWorkspace) { - ZSTD_free(cdict, cMem); - } - return 0; - } -} - -/*! ZSTD_initStaticCDict_advanced() : - * Generate a digested dictionary in provided memory area. - * workspace: The memory area to emplace the dictionary into. - * Provided pointer must 8-bytes aligned. - * It must outlive dictionary usage. - * workspaceSize: Use ZSTD_estimateCDictSize() - * to determine how large workspace must be. - * cParams : use ZSTD_getCParams() to transform a compression level - * into its relevants cParams. - * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) - * Note : there is no corresponding "free" function. - * Since workspace was allocated externally, it must be freed externally. - */ -const ZSTD_CDict* ZSTD_initStaticCDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) -{ - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); - size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) - + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + matchStateSize; - ZSTD_CDict* cdict; - - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - - { - ZSTD_cwksp ws; - ZSTD_cwksp_init(&ws, workspace, workspaceSize); - cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); - if (cdict == NULL) return NULL; - ZSTD_cwksp_move(&cdict->workspace, &ws); - } - - DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", - (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); - if (workspaceSize < neededSize) return NULL; - - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dict, dictSize, - dictLoadMethod, dictContentType, - cParams) )) - return NULL; - - return cdict; -} - -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) -{ - assert(cdict != NULL); - return cdict->matchState.cParams; -} - -/* ZSTD_compressBegin_usingCDict_advanced() : - * cdict must be != NULL */ -size_t ZSTD_compressBegin_usingCDict_advanced( - ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, - ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); - RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); - { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF - || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0 ) - && (params.attachDictPref != ZSTD_dictForceLoad) ? - ZSTD_getCParamsFromCDict(cdict) - : ZSTD_getCParams(cdict->compressionLevel, - pledgedSrcSize, - cdict->dictContentSize); - /* Increase window log to fit the entire dictionary and source if the - * source size is known. Limit the increase to 19, which is the - * window log for compression level 1 with the largest source size. - */ - if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { - U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); - U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; - params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); - } - params.fParams = fParams; - return ZSTD_compressBegin_internal(cctx, - NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_not_buffered); - } -} - -/* ZSTD_compressBegin_usingCDict() : - * pledgedSrcSize=0 means "unknown" - * if pledgedSrcSize>0, it will enable contentSizeFlag */ -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -{ - ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); - return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); -} - -size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) -{ - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} - -/*! ZSTD_compress_usingCDict() : - * Compression using a digested Dictionary. - * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. - * Note that compression parameters are decided at CDict creation time - * while frame parameters are hardcoded */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict) -{ - ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); -} - - - -/* ****************************************************************** -* Streaming -********************************************************************/ - -ZSTD_CStream* ZSTD_createCStream(void) -{ - DEBUGLOG(3, "ZSTD_createCStream"); - return ZSTD_createCStream_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) -{ - return ZSTD_initStaticCCtx(workspace, workspaceSize); -} - -ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) -{ /* CStream and CCtx are now same object */ - return ZSTD_createCCtx_advanced(customMem); -} - -size_t ZSTD_freeCStream(ZSTD_CStream* zcs) -{ - return ZSTD_freeCCtx(zcs); /* same object */ -} - - - -/*====== Initialization ======*/ - -size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } - -size_t ZSTD_CStreamOutSize(void) -{ - return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTDInternalConstants::ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; -} - -static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, - const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, - const ZSTD_CDict* const cdict, - ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_resetCStream_internal"); - /* Finalize the compression parameters */ - params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); - /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, dictContentType, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_buffered) , ""); - - cctx->inToCompress = 0; - cctx->inBuffPos = 0; - cctx->inBuffTarget = cctx->blockSize - + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ - cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; - cctx->streamStage = zcss_load; - cctx->frameEnded = 0; - return 0; /* ready to go */ -} - -/* ZSTD_resetCStream(): - * pledgedSrcSize == 0 means "unknown" */ -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) -{ - /* temporary : 0 interpreted as "unknown" during transition period. - * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. - * 0 will be interpreted as "empty" in the future. - */ - U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - return 0; -} - -/*! ZSTD_initCStream_internal() : - * Note : for lib/compress only. Used by zstdmt_compress.c. - * Assumption 1 : params are valid - * Assumption 2 : either dict, or cdict, is defined, not both */ -size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_initCStream_internal"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); - zcs->requestedParams = *params; - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - if (dict) { - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - } else { - /* Dictionary is cleared if !cdict */ - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - } - return 0; -} - -/* ZSTD_initCStream_usingCDict_advanced() : - * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ -size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - zcs->requestedParams.fParams = fParams; - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - return 0; -} - -/* note : cdict must outlive compression session */ -size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - return 0; -} - - -/* ZSTD_initCStream_advanced() : - * pledgedSrcSize must be exact. - * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ -size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pss) -{ - /* for compatibility with older programs relying on this behavior. - * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. - * This line will be removed in the future. - */ - U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_initCStream_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - return 0; -} - -size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - return 0; -} - -size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) -{ - /* temporary : 0 interpreted as "unknown" during transition period. - * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. - * 0 will be interpreted as "empty" in the future. - */ - U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_initCStream_srcSize"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - return 0; -} - -size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_initCStream"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - return 0; -} - -/*====== Compression ======*/ - -static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) -{ - size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; - if (hintInSize==0) hintInSize = cctx->blockSize; - return hintInSize; -} - -/** ZSTD_compressStream_generic(): - * internal function for all *compressStream*() variants - * non-static, because can be called from zstdmt_compress.c - * @return : hint size for next input */ -static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective const flushMode) -{ - const char* const istart = (const char*)input->src; - const char* const iend = input->size != 0 ? istart + input->size : istart; - const char* ip = input->pos != 0 ? istart + input->pos : istart; - char* const ostart = (char*)output->dst; - char* const oend = output->size != 0 ? ostart + output->size : ostart; - char* op = output->pos != 0 ? ostart + output->pos : ostart; - U32 someMoreWork = 1; - - /* check expectations */ - DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); - assert(zcs->inBuff != NULL); - assert(zcs->inBuffSize > 0); - assert(zcs->outBuff != NULL); - assert(zcs->outBuffSize > 0); - assert(output->pos <= output->size); - assert(input->pos <= input->size); - - while (someMoreWork) { - switch(zcs->streamStage) - { - case zcss_init: - RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); - - case zcss_load: - if ( (flushMode == ZSTD_e_end) - && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ - && (zcs->inBuffPos == 0) ) { - /* shortcut to compression pass directly into output buffer */ - size_t const cSize = ZSTD_compressEnd(zcs, - op, oend-op, ip, iend-ip); - DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); - FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); - ip = iend; - op += cSize; - zcs->frameEnded = 1; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - someMoreWork = 0; break; - } - /* complete loading into inBuffer */ - { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; - size_t const loaded = ZSTD_limitCopy( - zcs->inBuff + zcs->inBuffPos, toLoad, - ip, iend-ip); - zcs->inBuffPos += loaded; - if (loaded != 0) - ip += loaded; - if ( (flushMode == ZSTD_e_continue) - && (zcs->inBuffPos < zcs->inBuffTarget) ) { - /* not enough input to fill full block : stop here */ - someMoreWork = 0; break; - } - if ( (flushMode == ZSTD_e_flush) - && (zcs->inBuffPos == zcs->inToCompress) ) { - /* empty */ - someMoreWork = 0; break; - } - } - /* compress current block (note : this stage cannot be stopped in the middle) */ - DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); - { void* cDst; - size_t cSize; - size_t const iSize = zcs->inBuffPos - zcs->inToCompress; - size_t oSize = oend-op; - unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); - if (oSize >= ZSTD_compressBound(iSize)) - cDst = op; /* compress into output buffer, to skip flush stage */ - else - cDst = zcs->outBuff, oSize = zcs->outBuffSize; - cSize = lastBlock ? - ZSTD_compressEnd(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize) : - ZSTD_compressContinue(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize); - FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); - zcs->frameEnded = lastBlock; - /* prepare next block */ - zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; - if (zcs->inBuffTarget > zcs->inBuffSize) - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; - DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", - (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); - if (!lastBlock) - assert(zcs->inBuffTarget <= zcs->inBuffSize); - zcs->inToCompress = zcs->inBuffPos; - if (cDst == op) { /* no need to flush */ - op += cSize; - if (zcs->frameEnded) { - DEBUGLOG(5, "Frame completed directly in outBuffer"); - someMoreWork = 0; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - } - break; - } - zcs->outBuffContentSize = cSize; - zcs->outBuffFlushedSize = 0; - zcs->streamStage = zcss_flush; /* pass-through to flush stage */ - } - /* fall-through */ - case zcss_flush: - DEBUGLOG(5, "flush stage"); - { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), - zcs->outBuff + zcs->outBuffFlushedSize, toFlush); - DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", - (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); - if (flushed) - op += flushed; - zcs->outBuffFlushedSize += flushed; - if (toFlush!=flushed) { - /* flush not fully completed, presumably because dst is too small */ - assert(op==oend); - someMoreWork = 0; - break; - } - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - if (zcs->frameEnded) { - DEBUGLOG(5, "Frame completed on flush"); - someMoreWork = 0; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - break; - } - zcs->streamStage = zcss_load; - break; - } - - default: /* impossible */ - assert(0); - } - } - - input->pos = ip - istart; - output->pos = op - ostart; - if (zcs->frameEnded) return 0; - return ZSTD_nextInputSizeHint(zcs); -} - -static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers >= 1) { - assert(cctx->mtctx != NULL); - return ZSTDMT_nextInputSizeHint(cctx->mtctx); - } -#endif - return ZSTD_nextInputSizeHint(cctx); - -} - -size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); - return ZSTD_nextInputSizeHint_MTorST(zcs); -} - - -size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp) -{ - DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); - /* check conditions */ - RETURN_ERROR_IF(output->pos > output->size, GENERIC, "invalid buffer"); - RETURN_ERROR_IF(input->pos > input->size, GENERIC, "invalid buffer"); - assert(cctx!=NULL); - - /* transparent initialization stage */ - if (cctx->streamStage == zcss_init) { - ZSTD_CCtx_params params = cctx->requestedParams; - ZSTD_prefixDict const prefixDict = cctx->prefixDict; - FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ - assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ - DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); - if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ - params.cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); - - -#ifdef ZSTD_MULTITHREAD - if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { - params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ - } - if (params.nbWorkers > 0) { - /* mt context creation */ - if (cctx->mtctx == NULL) { - DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", - params.nbWorkers); - cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); - RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); - } - /* mt compression */ - DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); - FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( - cctx->mtctx, - prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, - cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); - cctx->streamStage = zcss_load; - cctx->appliedParams.nbWorkers = params.nbWorkers; - } else -#endif - { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, - prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, - cctx->cdict, - params, cctx->pledgedSrcSizePlusOne-1) , ""); - assert(cctx->streamStage == zcss_load); - assert(cctx->appliedParams.nbWorkers == 0); - } } - /* end of transparent initialization stage */ - - /* compression stage */ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); - size_t flushMin; - assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); - if (cctx->cParamsChanged) { - ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); - cctx->cParamsChanged = 0; - } - do { - flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); - if ( ZSTD_isError(flushMin) - || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); - } - FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); - } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); - DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); - /* Either we don't require maximum forward progress, we've finished the - * flush, or we are out of output space. - */ - assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); - return flushMin; - } -#endif - FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); - DEBUGLOG(5, "completed ZSTD_compressStream2"); - return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ -} - -size_t ZSTD_compressStream2_simpleArgs ( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp) -{ - ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; - ZSTD_inBuffer input = { src, srcSize, *srcPos }; - /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ - size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); - *dstPos = output.pos; - *srcPos = input.pos; - return cErr; -} - -size_t ZSTD_compress2(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); - { size_t oPos = 0; - size_t iPos = 0; - size_t const result = ZSTD_compressStream2_simpleArgs(cctx, - dst, dstCapacity, &oPos, - src, srcSize, &iPos, - ZSTD_e_end); - FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); - if (result != 0) { /* compression not completed, due to lack of output space */ - assert(oPos == dstCapacity); - RETURN_ERROR(dstSize_tooSmall, ""); - } - assert(iPos == srcSize); /* all input is expected consumed */ - return oPos; - } -} - -/*====== Finalize ======*/ - -/*! ZSTD_flushStream() : - * @return : amount of data remaining to flush */ -size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) -{ - ZSTD_inBuffer input = { NULL, 0, 0 }; - return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); -} - - -size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) -{ - ZSTD_inBuffer input = { NULL, 0, 0 }; - size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); - FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); - if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ - /* single thread mode : attempt to calculate remaining to flush more precisely */ - { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; - size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); - size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; - DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); - return toFlush; - } -} - - -/*-===== Pre-defined compression levels =====-*/ - -#define ZSTD_MAX_CLEVEL 22 -int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } -int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } - -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { -{ /* "default" - for any srcSize > 256 KB */ - /* W, C, H, S, L, TL, strat */ - { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ - { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ - { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ - { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ - { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ - { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ - { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ - { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ - { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ - { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ - { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ - { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ - { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ - { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ - { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ -}, -{ /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ - { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ - { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ - { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ - { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ - { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ - { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ - { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ - { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ - { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ - { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ - { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ - { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ - { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ - { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ - { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ - { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ - { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ - { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ - { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ - { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ - { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ - { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ - { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ - { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ - { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ - { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ - { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -}; - -/*! ZSTD_getCParams_internal() : - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. - * Use dictSize == 0 for unknown or unused. */ -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) -{ - int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; - size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; - U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; - U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); - int row = compressionLevel; - DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); - if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ - if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ - if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; - { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; - if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ - /* refine parameters based on srcSize & dictSize */ - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); - } -} - -/*! ZSTD_getCParams() : - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) -{ - if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); -} - -/*! ZSTD_getParams() : - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ -static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); - DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); - memset(¶ms, 0, sizeof(params)); - params.cParams = cParams; - params.fParams.contentSizeFlag = 1; - return params; -} - -/*! ZSTD_getParams() : - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - /*-************************************* - * Dependencies - ***************************************/ - - -namespace duckdb_zstd { -size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - - memcpy(ostart + flSize, src, srcSize); - DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); - return srcSize + flSize; -} - -size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - - ostart[flSize] = *(const BYTE*)src; - DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); - return flSize+1; -} - -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2) -{ - size_t const minGain = ZSTD_minGain(srcSize, strategy); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; - - DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", - disableLiteralCompression, (U32)srcSize); - - /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralCompression) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - - RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); - { HUF_repeat repeat = prevHuf->repeatMode; - int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; - if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; - cLitSize = singleStream ? - HUF_compress1X_repeat( - ostart+lhSize, dstCapacity-lhSize, src, srcSize, - HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : - HUF_compress4X_repeat( - ostart+lhSize, dstCapacity-lhSize, src, srcSize, - HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); - if (repeat != HUF_repeat_none) { - /* reused the existing table */ - DEBUGLOG(5, "Reusing previous huffman table"); - hType = set_repeat; - } - } - - if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - if (cLitSize==1) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); - } - - if (hType == set_compressed) { - /* using a newly constructed table */ - nextHuf->repeatMode = HUF_repeat_check; - } - - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - default: /* not possible : lhSize is {3,4,5} */ - assert(0); - } - DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); - return lhSize+cLitSize; -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - /*-************************************* - * Dependencies - ***************************************/ - - -namespace duckdb_zstd { -/** - * -log2(x / 256) lookup table for x in [0, 256). - * If x == 0: Return 0 - * Else: Return floor(-log2(x / 256) * 256) - */ -static unsigned const kInverseProbabilityLog256[256] = { - 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, - 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, - 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, - 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, - 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, - 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, - 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, - 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, - 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, - 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, - 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, - 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, - 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, - 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, - 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, - 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, - 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, - 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, - 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, - 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, - 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, - 5, 4, 2, 1, -}; - -static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { - void const* ptr = ctable; - U16 const* u16ptr = (U16 const*)ptr; - U32 const maxSymbolValue = MEM_read16(u16ptr + 1); - return maxSymbolValue; -} - -/** - * Returns the cost in bytes of encoding the normalized count header. - * Returns an error if any of the helper functions return an error. - */ -static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, - size_t const nbSeq, unsigned const FSELog) -{ - BYTE wksp[FSE_NCOUNTBOUND]; - S16 norm[MaxSeq + 1]; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), ""); - return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); -} - -/** - * Returns the cost in bits of encoding the distribution described by count - * using the entropy bound. - */ -static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) -{ - unsigned cost = 0; - unsigned s; - for (s = 0; s <= max; ++s) { - unsigned norm = (unsigned)((256 * count[s]) / total); - if (count[s] != 0 && norm == 0) - norm = 1; - assert(count[s] < total); - cost += count[s] * kInverseProbabilityLog256[norm]; - } - return cost >> 8; -} - -/** - * Returns the cost in bits of encoding the distribution in count using ctable. - * Returns an error if ctable cannot represent all the symbols in count. - */ -size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, - unsigned const* count, - unsigned const max) -{ - unsigned const kAccuracyLog = 8; - size_t cost = 0; - unsigned s; - FSE_CState_t cstate; - FSE_initCState(&cstate, ctable); - if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { - DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", - ZSTD_getFSEMaxSymbolValue(ctable), max); - return ERROR(GENERIC); - } - for (s = 0; s <= max; ++s) { - unsigned const tableLog = cstate.stateLog; - unsigned const badCost = (tableLog + 1) << kAccuracyLog; - unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); - if (count[s] == 0) - continue; - if (bitCost >= badCost) { - DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); - return ERROR(GENERIC); - } - cost += (size_t)count[s] * bitCost; - } - return cost >> kAccuracyLog; -} - -/** - * Returns the cost in bits of encoding the distribution in count using the - * table described by norm. The max symbol support by norm is assumed >= max. - * norm must be valid for every symbol with non-zero probability in count. - */ -size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max) -{ - unsigned const shift = 8 - accuracyLog; - size_t cost = 0; - unsigned s; - assert(accuracyLog <= 8); - for (s = 0; s <= max; ++s) { - unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; - unsigned const norm256 = normAcc << shift; - assert(norm256 > 0); - assert(norm256 < 256); - cost += count[s] * kInverseProbabilityLog256[norm256]; - } - return cost >> 8; -} - -symbolEncodingType_e -ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, - ZSTD_defaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy) -{ - ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); - if (mostFrequent == nbSeq) { - *repeatMode = FSE_repeat_none; - if (isDefaultAllowed && nbSeq <= 2) { - /* Prefer set_basic over set_rle when there are 2 or less symbols, - * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. - * If basic encoding isn't possible, always choose RLE. - */ - DEBUGLOG(5, "Selected set_basic"); - return set_basic; - } - DEBUGLOG(5, "Selected set_rle"); - return set_rle; - } - if (strategy < ZSTD_lazy) { - if (isDefaultAllowed) { - size_t const staticFse_nbSeq_max = 1000; - size_t const mult = 10 - strategy; - size_t const baseLog = 3; - size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ - assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ - assert(mult <= 9 && mult >= 7); - if ( (*repeatMode == FSE_repeat_valid) - && (nbSeq < staticFse_nbSeq_max) ) { - DEBUGLOG(5, "Selected set_repeat"); - return set_repeat; - } - if ( (nbSeq < dynamicFse_nbSeq_min) - || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { - DEBUGLOG(5, "Selected set_basic"); - /* The format allows default tables to be repeated, but it isn't useful. - * When using simple heuristics to select encoding type, we don't want - * to confuse these tables with dictionaries. When running more careful - * analysis, we don't need to waste time checking both repeating tables - * and default tables. - */ - *repeatMode = FSE_repeat_none; - return set_basic; - } - } - } else { - size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); - size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); - size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); - size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); - - if (isDefaultAllowed) { - assert(!ZSTD_isError(basicCost)); - assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); - } - assert(!ZSTD_isError(NCountCost)); - assert(compressedCost < ERROR(maxCode)); - DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", - (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); - if (basicCost <= repeatCost && basicCost <= compressedCost) { - DEBUGLOG(5, "Selected set_basic"); - assert(isDefaultAllowed); - *repeatMode = FSE_repeat_none; - return set_basic; - } - if (repeatCost <= compressedCost) { - DEBUGLOG(5, "Selected set_repeat"); - assert(!ZSTD_isError(repeatCost)); - return set_repeat; - } - assert(compressedCost < basicCost && compressedCost < repeatCost); - } - DEBUGLOG(5, "Selected set_compressed"); - *repeatMode = FSE_repeat_check; - return set_compressed; -} - -size_t -ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, - const FSE_CTable* prevCTable, size_t prevCTableSize, - void* entropyWorkspace, size_t entropyWorkspaceSize) -{ - BYTE* op = (BYTE*)dst; - const BYTE* const oend = op + dstCapacity; - DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); - - switch (type) { - case set_rle: - FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); - RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); - *op = codeTable[0]; - return 1; - case set_repeat: - memcpy(nextCTable, prevCTable, prevCTableSize); - return 0; - case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ - return 0; - case set_compressed: { - S16 norm[MaxSeq + 1]; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - if (count[codeTable[nbSeq-1]] > 1) { - count[codeTable[nbSeq-1]]--; - nbSeq_1--; - } - assert(nbSeq_1 > 1); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), ""); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); - return NCountSize; - } - } - default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); - } -} - -FORCE_INLINE_TEMPLATE size_t -ZSTD_encodeSequences_body( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - RETURN_ERROR_IF( - ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), - dstSize_tooSmall, "not enough space remaining"); - DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", - (int)(blockStream.endPtr - blockStream.startPtr), - (unsigned)dstCapacity); - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, ZSTDInternalConstants::LL_bits[llCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ZSTDInternalConstants::ML_bits[mlCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - if (longOffsets) { - U32 const ofBits = ofCodeTable[nbSeq-1]; - unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); - BIT_flushBits(&blockStream); - } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, - ofBits - extraBits); - } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); - } - BIT_flushBits(&blockStream); - - { size_t n; - for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); - if (longOffsets) { - unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); - BIT_flushBits(&blockStream); /* (7)*/ - } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, - ofBits - extraBits); /* 31 */ - } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - } - BIT_flushBits(&blockStream); /* (7)*/ - DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); - } } - - DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); - FSE_flushCState(&blockStream, &stateMatchLength); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); - FSE_flushCState(&blockStream, &stateOffsetBits); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); - FSE_flushCState(&blockStream, &stateLitLength); - - { size_t const streamSize = BIT_closeCStream(&blockStream); - RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); - return streamSize; - } -} - -static size_t -ZSTD_encodeSequences_default( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - - -#if DYNAMIC_BMI2 - -static TARGET_ATTRIBUTE("bmi2") size_t -ZSTD_encodeSequences_bmi2( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - -#endif - -size_t ZSTD_encodeSequences( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) -{ - DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); -#if DYNAMIC_BMI2 - if (bmi2) { - return ZSTD_encodeSequences_bmi2(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); - } -#endif - (void)bmi2; - return ZSTD_encodeSequences_default(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - /*-************************************* - * Dependencies - ***************************************/ - - - /* ZSTD_getSequenceLength */ - /* HIST_countFast_wksp */ - - - - -namespace duckdb_zstd { -/*-************************************* -* Superblock entropy buffer structs -***************************************/ -/** ZSTD_hufCTablesMetadata_t : - * Stores Literals Block Type for a super-block in hType, and - * huffman tree description in hufDesBuffer. - * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ -typedef struct { - symbolEncodingType_e hType; - BYTE hufDesBuffer[500]; /* TODO give name to this value */ - size_t hufDesSize; -} ZSTD_hufCTablesMetadata_t; - -/** ZSTD_fseCTablesMetadata_t : - * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and - * fse tables in fseTablesBuffer. - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ -typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; - BYTE fseTablesBuffer[500]; /* TODO give name to this value */ - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ -} ZSTD_fseCTablesMetadata_t; - -typedef struct { - ZSTD_hufCTablesMetadata_t hufMetadata; - ZSTD_fseCTablesMetadata_t fseMetadata; -} ZSTD_entropyCTablesMetadata_t; - - -/** ZSTD_buildSuperBlockEntropy_literal() : - * Builds entropy for the super-block literals. - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * @return : size of huffman description table or error code */ -static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, - const ZSTD_hufCTables_t* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_hufCTablesMetadata_t* hufMetadata, - const int disableLiteralsCompression, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; - const size_t nodeWkspSize = wkspEnd-nodeWksp; - unsigned maxSymbolValue = 255; - unsigned huffLog = HUF_TABLELOG_DEFAULT; - HUF_repeat repeat = prevHuf->repeatMode; - - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); - - /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralsCompression) { - DEBUGLOG(5, "set_basic - disabled"); - hufMetadata->hType = set_basic; - return 0; - } - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Scan input and build symbol stats */ - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Validate the previous Huffman table */ - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } - - /* Build Huffman Tree */ - memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; - { /* Build and write the CTable */ - size_t const newCSize = HUF_estimateCompressedSize( - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); - size_t const hSize = HUF_writeCTable( - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); - /* Check against repeating the previous CTable */ - if (repeat != HUF_repeat_none) { - size_t const oldCSize = HUF_estimateCompressedSize( - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { - DEBUGLOG(5, "set_repeat - smaller"); - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_repeat; - return 0; - } - } - if (newCSize + hSize >= srcSize) { - DEBUGLOG(5, "set_basic - no gains"); - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_basic; - return 0; - } - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); - hufMetadata->hType = set_compressed; - nextHuf->repeatMode = HUF_repeat_check; - return hSize; - } - } -} - -/** ZSTD_buildSuperBlockEntropy_sequences() : - * Builds entropy for the super-block sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * @return : size of fse tables or error code */ -static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, - const ZSTD_fseCTables_t* prevEntropy, - ZSTD_fseCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); - BYTE* const cTableWksp = countWkspStart + countWkspSize; - const size_t cTableWkspSize = wkspEnd-cTableWksp; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; - - assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); - memset(workspace, 0, wkspSize); - - fseMetadata->lastCountSize = 0; - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { U32 LLtype; - unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->litlengthCTable, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - countWksp, max, llCodeTable, nbSeq, ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } } - /* build CTable for Offsets */ - { U32 Offtype; - unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, - countWksp, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->offcodeCTable, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - countWksp, max, ofCodeTable, nbSeq, ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } } - /* build CTable for MatchLengths */ - { U32 MLtype; - unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->matchlengthCTable, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - countWksp, max, mlCodeTable, nbSeq, ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } } - assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); - return op-ostart; -} - - -/** ZSTD_buildSuperBlockEntropy() : - * Builds entropy for the super-block. - * @return : 0 on success or error code */ -static size_t -ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) -{ - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_disableLiteralsCompression(cctxParams), - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, - &prevEntropy->fse, &nextEntropy->fse, - cctxParams, - &entropyMetadata->fseMetadata, - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); - return 0; -} - -/** ZSTD_compressSubBlock_literal() : - * Compresses literals section for a sub-block. - * When we have to write the Huffman table we will sometimes choose a header - * size larger than necessary. This is because we have to pick the header size - * before we know the table size + compressed size, so we have a bound on the - * table size. If we guessed incorrectly, we fall back to uncompressed literals. - * - * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded - * in writing the header, otherwise it is set to 0. - * - * hufMetadata->hType has literals block type info. - * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. - * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. - * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block - * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block - * and the following sub-blocks' literals sections will be Treeless_Literals_Block. - * @return : compressed size of literals section of a sub-block - * Or 0 if it unable to compress. - * Or error code */ -static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, - const ZSTD_hufCTablesMetadata_t* hufMetadata, - const BYTE* literals, size_t litSize, - void* dst, size_t dstSize, - const int bmi2, int writeEntropy, int* entropyWritten) -{ - size_t const header = writeEntropy ? 200 : 0; - size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart + lhSize; - U32 const singleStream = lhSize == 3; - symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; - size_t cLitSize = 0; - - (void)bmi2; /* TODO bmi2... */ - - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); - - *entropyWritten = 0; - if (litSize == 0 || hufMetadata->hType == set_basic) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } else if (hufMetadata->hType == set_rle) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); - return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); - } - - assert(litSize > 0); - assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); - - if (writeEntropy && hufMetadata->hType == set_compressed) { - memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); - op += hufMetadata->hufDesSize; - cLitSize += hufMetadata->hufDesSize; - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); - } - - /* TODO bmi2 */ - { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) - : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); - op += cSize; - cLitSize += cSize; - if (cSize == 0 || ERR_isError(cSize)) { - DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); - return 0; - } - /* If we expand and we aren't writing a header then emit uncompressed */ - if (!writeEntropy && cLitSize >= litSize) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } - /* If we are writing headers then allow expansion that doesn't change our header size. */ - if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { - assert(cLitSize > litSize); - DEBUGLOG(5, "Literals expanded beyond allowed header size"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); - } - - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - default: /* not possible : lhSize is {3,4,5} */ - assert(0); - } - *entropyWritten = 1; - DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); - return op-ostart; -} - -static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { - const seqDef* const sstart = sequences; - const seqDef* const send = sequences + nbSeq; - const seqDef* sp = sstart; - size_t matchLengthSum = 0; - while (send-sp > 0) { - ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); - matchLengthSum += seqLen.matchLength; - sp++; - } - return matchLengthSum + litSize; -} - -/** ZSTD_compressSubBlock_sequences() : - * Compresses sequences section for a sub-block. - * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have - * symbol compression modes for the super-block. - * The first successfully compressed block will have these in its header. - * We set entropyWritten=1 when we succeed in compressing the sequences. - * The following sub-blocks will always have repeat mode. - * @return : compressed size of sequences section of a sub-block - * Or 0 if it is unable to compress - * Or error code. */ -static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, - const ZSTD_fseCTablesMetadata_t* fseMetadata, - const seqDef* sequences, size_t nbSeq, - const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const int bmi2, int writeEntropy, int* entropyWritten) -{ - const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - BYTE* seqHead; - - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); - - *entropyWritten = 0; - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, ""); - if (nbSeq < 0x7F) - *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else - op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) { - return op - ostart; - } - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); - - if (writeEntropy) { - const U32 LLtype = fseMetadata->llType; - const U32 Offtype = fseMetadata->ofType; - const U32 MLtype = fseMetadata->mlType; - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); - op += fseMetadata->fseTablesSize; - } else { - const U32 repeat = set_repeat; - *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); - } - - { size_t const bitstreamSize = ZSTD_encodeSequences( - op, oend - op, - fseTables->matchlengthCTable, mlCode, - fseTables->offcodeCTable, ofCode, - fseTables->litlengthCTable, llCode, - sequences, nbSeq, - longOffsets, bmi2); - FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); - op += bitstreamSize; - /* zstd versions <= 1.3.4 mistakenly report corruption when - * FSE_readNCount() receives a buffer < 4 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1146. - * This can happen when the last set_compressed table present is 2 - * bytes and the bitstream is only one byte. - * In this exceedingly rare case, we will simply emit an uncompressed - * block, since it isn't worth optimizing. - */ -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(fseMetadata->lastCountSize + bitstreamSize == 3); - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " - "emitting an uncompressed block."); - return 0; - } -#endif - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); - } - - /* zstd versions <= 1.4.0 mistakenly report error when - * sequences section body size is less than 3 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1664. - * This can happen when the previous sequences section block is compressed - * with rle mode and the current block's sequences section is compressed - * with repeat mode where sequences section body size can be 1 byte. - */ -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (op-seqHead < 4) { - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " - "an uncompressed block when sequences are < 4 bytes"); - return 0; - } -#endif - - *entropyWritten = 1; - return op - ostart; -} - -/** ZSTD_compressSubBlock() : - * Compresses a single sub-block. - * @return : compressed size of the sub-block - * Or 0 if it failed to compress. */ -static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - const seqDef* sequences, size_t nbSeq, - const BYTE* literals, size_t litSize, - const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const int bmi2, - int writeLitEntropy, int writeSeqEntropy, - int* litEntropyWritten, int* seqEntropyWritten, - U32 lastBlock) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart + ZSTDInternalConstants::ZSTD_blockHeaderSize; - DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", - litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); - { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, - &entropyMetadata->hufMetadata, literals, litSize, - op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); - FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); - if (cLitSize == 0) return 0; - op += cLitSize; - } - { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, - &entropyMetadata->fseMetadata, - sequences, nbSeq, - llCode, mlCode, ofCode, - cctxParams, - op, oend-op, - bmi2, writeSeqEntropy, seqEntropyWritten); - FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); - if (cSeqSize == 0) return 0; - op += cSeqSize; - } - /* Write block header */ - { size_t cSize = (op-ostart)-ZSTDInternalConstants::ZSTD_blockHeaderSize; - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(ostart, cBlockHeader24); - } - return op-ostart; -} - -static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, - const ZSTD_hufCTables_t* huf, - const ZSTD_hufCTablesMetadata_t* hufMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) -{ - unsigned* const countWksp = (unsigned*)workspace; - unsigned maxSymbolValue = 255; - size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ - - if (hufMetadata->hType == set_basic) return litSize; - else if (hufMetadata->hType == set_rle) return 1; - else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { - size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); - if (ZSTD_isError(largest)) return litSize; - { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); - if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; - return cLitSizeEstimate + literalSectionHeaderSize; - } } - assert(0); /* impossible */ - return 0; -} - -static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, - const BYTE* codeTable, unsigned maxCode, - size_t nbSeq, const FSE_CTable* fseCTable, - const U32* additionalBits, - short const* defaultNorm, U32 defaultNormLog, - void* workspace, size_t wkspSize) -{ - unsigned* const countWksp = (unsigned*)workspace; - const BYTE* ctp = codeTable; - const BYTE* const ctStart = ctp; - const BYTE* const ctEnd = ctStart + nbSeq; - size_t cSymbolTypeSizeEstimateInBits = 0; - unsigned max = maxCode; - - HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ - if (type == set_basic) { - cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); - } else if (type == set_rle) { - cSymbolTypeSizeEstimateInBits = 0; - } else if (type == set_compressed || type == set_repeat) { - cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); - } - if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; - while (ctp < ctEnd) { - if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; - else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ - ctp++; - } - return cSymbolTypeSizeEstimateInBits / 8; -} - -static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_fseCTables_t* fseTables, - const ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) -{ - size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ - size_t cSeqSizeEstimate = 0; - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, - nbSeq, fseTables->offcodeCTable, NULL, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, - nbSeq, fseTables->litlengthCTable, ZSTDInternalConstants::LL_bits, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, - nbSeq, fseTables->matchlengthCTable, ZSTDInternalConstants::ML_bits, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - workspace, wkspSize); - if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; - return cSeqSizeEstimate + sequencesSectionHeaderSize; -} - -static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, - const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize, - int writeLitEntropy, int writeSeqEntropy) { - size_t cSizeEstimate = 0; - cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, - &entropy->huf, &entropyMetadata->hufMetadata, - workspace, wkspSize, writeLitEntropy); - cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, - nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, - workspace, wkspSize, writeSeqEntropy); - return cSizeEstimate + ZSTDInternalConstants::ZSTD_blockHeaderSize; -} - -static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) -{ - if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) - return 1; - if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) - return 1; - if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) - return 1; - return 0; -} - -/** ZSTD_compressSubBlock_multi() : - * Breaks super-block into multiple sub-blocks and compresses them. - * Entropy will be written to the first block. - * The following blocks will use repeat mode to compress. - * All sub-blocks are compressed blocks (no raw or rle blocks). - * @return : compressed size of the super block (which is multiple ZSTD blocks) - * Or 0 if it failed to compress. */ -static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, - const ZSTD_compressedBlockState_t* prevCBlock, - ZSTD_compressedBlockState_t* nextCBlock, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const int bmi2, U32 lastBlock, - void* workspace, size_t wkspSize) -{ - const seqDef* const sstart = seqStorePtr->sequencesStart; - const seqDef* const send = seqStorePtr->sequences; - const seqDef* sp = sstart; - const BYTE* const lstart = seqStorePtr->litStart; - const BYTE* const lend = seqStorePtr->lit; - const BYTE* lp = lstart; - BYTE const* ip = (BYTE const*)src; - BYTE const* const iend = ip + srcSize; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - const BYTE* llCodePtr = seqStorePtr->llCode; - const BYTE* mlCodePtr = seqStorePtr->mlCode; - const BYTE* ofCodePtr = seqStorePtr->ofCode; - size_t targetCBlockSize = cctxParams->targetCBlockSize; - size_t litSize, seqCount; - int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; - int writeSeqEntropy = 1; - int lastSequence = 0; - - DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", - (unsigned)(lend-lp), (unsigned)(send-sstart)); - - litSize = 0; - seqCount = 0; - do { - size_t cBlockSizeEstimate = 0; - if (sstart == send) { - lastSequence = 1; - } else { - const seqDef* const sequence = sp + seqCount; - lastSequence = sequence == send - 1; - litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; - seqCount++; - } - if (lastSequence) { - assert(lp <= lend); - assert(litSize <= (size_t)(lend - lp)); - litSize = (size_t)(lend - lp); - } - /* I think there is an optimization opportunity here. - * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful - * since it recalculates estimate from scratch. - * For example, it would recount literal distribution and symbol codes everytime. - */ - cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, - &nextCBlock->entropy, entropyMetadata, - workspace, wkspSize, writeLitEntropy, writeSeqEntropy); - if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { - int litEntropyWritten = 0; - int seqEntropyWritten = 0; - const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); - const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, - sp, seqCount, - lp, litSize, - llCodePtr, mlCodePtr, ofCodePtr, - cctxParams, - op, oend-op, - bmi2, writeLitEntropy, writeSeqEntropy, - &litEntropyWritten, &seqEntropyWritten, - lastBlock && lastSequence); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); - if (cSize > 0 && cSize < decompressedSize) { - DEBUGLOG(5, "Committed the sub-block"); - assert(ip + decompressedSize <= iend); - ip += decompressedSize; - sp += seqCount; - lp += litSize; - op += cSize; - llCodePtr += seqCount; - mlCodePtr += seqCount; - ofCodePtr += seqCount; - litSize = 0; - seqCount = 0; - /* Entropy only needs to be written once */ - if (litEntropyWritten) { - writeLitEntropy = 0; - } - if (seqEntropyWritten) { - writeSeqEntropy = 0; - } - } - } - } while (!lastSequence); - if (writeLitEntropy) { - DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); - memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); - } - if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { - /* If we haven't written our entropy tables, then we've violated our contract and - * must emit an uncompressed block. - */ - DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); - return 0; - } - if (ip < iend) { - size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); - DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - assert(cSize != 0); - op += cSize; - /* We have to regenerate the repcodes because we've skipped some sequences */ - if (sp < send) { - seqDef const* seq; - repcodes_t rep; - memcpy(&rep, prevCBlock->rep, sizeof(rep)); - for (seq = sstart; seq < sp; ++seq) { - rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); - } - memcpy(nextCBlock->rep, &rep, sizeof(rep)); - } - } - DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); - return op-ostart; -} - -size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - void const* src, size_t srcSize, - unsigned lastBlock) { - ZSTD_entropyCTablesMetadata_t entropyMetadata; - - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, - &zc->blockState.prevCBlock->entropy, - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - &entropyMetadata, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); - - return ZSTD_compressSubBlock_multi(&zc->seqStore, - zc->blockState.prevCBlock, - zc->blockState.nextCBlock, - &entropyMetadata, - &zc->appliedParams, - dst, dstCapacity, - src, srcSize, - zc->bmi2, lastBlock, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - - - -namespace duckdb_zstd { - -void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashLarge = ms->hashTable; - U32 const hBitsL = cParams->hashLog; - U32 const mls = cParams->minMatch; - U32* const hashSmall = ms->chainTable; - U32 const hBitsS = cParams->chainLog; - const BYTE* const base = ms->window.base; - const BYTE* ip = base + ms->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const U32 fastHashFillStep = 3; - - /* Always insert every fastHashFillStep position into the hash tables. - * Insert the other positions into the large hash table if their entry - * is empty. - */ - for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { - U32 const current = (U32)(ip - base); - U32 i; - for (i = 0; i < fastHashFillStep; ++i) { - size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); - size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); - if (i == 0) - hashSmall[smHash] = current + i; - if (i == 0 || hashLarge[lgHash] == 0) - hashLarge[lgHash] = current + i; - /* Only load extra positions for ZSTD_dtlm_full */ - if (dtlm == ZSTD_dtlm_fast) - break; - } } -} - - -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_doubleFast_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */, ZSTD_dictMode_e const dictMode) -{ - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32* const hashLong = ms->hashTable; - const U32 hBitsL = cParams->hashLog; - U32* const hashSmall = ms->chainTable; - const U32 hBitsS = cParams->chainLog; - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - /* presumes that, if there is a dictionary, it must be using Attach mode */ - const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); - const BYTE* const prefixLowest = base + prefixLowestIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; - - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = - dictMode == ZSTD_dictMatchState ? - &dms->cParams : NULL; - const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? - dms->hashTable : NULL; - const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? - dms->chainTable : NULL; - const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? - dictBase + dictStartIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? - dictCParams->hashLog : hBitsL; - const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? - dictCParams->chainLog : hBitsS; - const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); - - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); - - assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); - - /* if a dictionary is attached, it must be within window range */ - if (dictMode == ZSTD_dictMatchState) { - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); - } - - /* init */ - ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - U32 offset; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); - size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); - size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndexL = hashLong[h2]; - U32 matchIndexS = hashSmall[h]; - const BYTE* matchLong = base + matchIndexL; - const BYTE* match = base + matchIndexS; - const U32 repIndex = current + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - hashLong[h2] = hashSmall[h] = current; /* update hash tables */ - - /* check dictMatchState repcode */ - if (dictMode == ZSTD_dictMatchState - && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } - - /* check noDict repcode */ - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } - - if (matchIndexL > prefixLowestIndex) { - /* check prefix long match */ - if (MEM_read64(matchLong) == MEM_read64(ip)) { - mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; - offset = (U32)(ip-matchLong); - while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - goto _match_found; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dictMatchState long match */ - U32 const dictMatchIndexL = dictHashLong[dictHL]; - const BYTE* dictMatchL = dictBase + dictMatchIndexL; - assert(dictMatchL < dictEnd); - - if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { - mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; - offset = (U32)(current - dictMatchIndexL - dictIndexDelta); - while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ - goto _match_found; - } } - - if (matchIndexS > prefixLowestIndex) { - /* check prefix short match */ - if (MEM_read32(match) == MEM_read32(ip)) { - goto _search_next_long; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dictMatchState short match */ - U32 const dictMatchIndexS = dictHashSmall[dictHS]; - match = dictBase + dictMatchIndexS; - matchIndexS = dictMatchIndexS + dictIndexDelta; - - if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { - goto _search_next_long; - } } - - ip += ((ip-anchor) >> kSearchStrength) + 1; -#if defined(__aarch64__) - PREFETCH_L1(ip+256); -#endif - continue; - -_search_next_long: - - { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); - U32 const matchIndexL3 = hashLong[hl3]; - const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = current + 1; - - /* check prefix long +1 match */ - if (matchIndexL3 > prefixLowestIndex) { - if (MEM_read64(matchL3) == MEM_read64(ip+1)) { - mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; - ip++; - offset = (U32)(ip-matchL3); - while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ - goto _match_found; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dict long +1 match */ - U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; - const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; - assert(dictMatchL3 < dictEnd); - if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { - mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; - ip++; - offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); - while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ - goto _match_found; - } } } - - /* if no long +1 match, explore the short match we found */ - if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { - mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; - offset = (U32)(current - matchIndexS); - while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } else { - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip - match); - while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - - /* fall-through */ - -_match_found: - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - -_match_stored: - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Complementary insertion */ - /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = current+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; - hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); - } - - /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < prefixLowestIndex ? - dictBase + repIndex2 - dictIndexDelta : - base + repIndex2; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } - - if (dictMode == ZSTD_noDict) { - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - } /* while (ip < ilimit) */ - - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_doubleFast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const U32 mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); - case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); - case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); - case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); - } -} - - -size_t ZSTD_compressBlock_doubleFast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const U32 mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); - case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); - case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); - case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); - } -} - - -static size_t ZSTD_compressBlock_doubleFast_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */) -{ - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32* const hashLong = ms->hashTable; - U32 const hBitsL = cParams->hashLog; - U32* const hashSmall = ms->chainTable; - U32 const hBitsS = cParams->chainLog; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); - const U32 dictStartIndex = lowLimit; - const U32 dictLimit = ms->window.dictLimit; - const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dictBase + prefixStartIndex; - U32 offset_1=rep[0], offset_2=rep[1]; - - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); - - /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ - if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); - const U32 matchIndex = hashSmall[hSmall]; - const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - - const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); - const U32 matchLongIndex = hashLong[hLong]; - const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; - const BYTE* matchLong = matchLongBase + matchLongIndex; - - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ - const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ - - if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ - & (repIndex > dictStartIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - } else { - if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { - const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; - U32 offset; - mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; - offset = current - matchLongIndex; - while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - - } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; - const BYTE* match3 = match3Base + matchIndex3; - U32 offset; - hashLong[h3] = current + 1; - if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { - const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; - mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; - ip++; - offset = current+1 - matchIndex3; - while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ - } else { - const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; - offset = current - matchIndex; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - - } else { - ip += ((ip-anchor) >> kSearchStrength) + 1; - continue; - } } - - /* move to next sequence start */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Complementary insertion */ - /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = current+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; - hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); - } - - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ - & (repIndex2 > dictStartIndex)) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } - - /* save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_doubleFast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); - } -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ - - -namespace duckdb_zstd { - -void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - const void* const end, - ZSTD_dictTableLoadMethod_e dtlm) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hBits = cParams->hashLog; - U32 const mls = cParams->minMatch; - const BYTE* const base = ms->window.base; - const BYTE* ip = base + ms->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const U32 fastHashFillStep = 3; - - /* Always insert every fastHashFillStep position into the hash table. - * Insert the other positions if their hash entry is empty. - */ - for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { - U32 const current = (U32)(ip - base); - size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); - hashTable[hash0] = current; - if (dtlm == ZSTD_dtlm_fast) continue; - /* Only load extra positions for ZSTD_dtlm_full */ - { U32 p; - for (p = 1; p < fastHashFillStep; ++p) { - size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); - if (hashTable[hash] == 0) { /* not yet filled */ - hashTable[hash] = current + p; - } } } } -} - - -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_fast_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ - const BYTE* ip0 = istart; - const BYTE* ip1; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; - - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); - ip0 += (ip0 == prefixStart); - ip1 = ip0 + 1; - { U32 const current = (U32)(ip0 - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ -#ifdef __INTEL_COMPILER - /* From intel 'The vector pragma indicates that the loop should be - * vectorized if it is legal to do so'. Can be used together with - * #pragma ivdep (but have opted to exclude that because intel - * warns against using it).*/ - #pragma vector always -#endif - while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ - size_t mLength; - BYTE const* ip2 = ip0 + 2; - size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); - U32 const val0 = MEM_read32(ip0); - size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); - U32 const val1 = MEM_read32(ip1); - U32 const current0 = (U32)(ip0-base); - U32 const current1 = (U32)(ip1-base); - U32 const matchIndex0 = hashTable[h0]; - U32 const matchIndex1 = hashTable[h1]; - BYTE const* repMatch = ip2 - offset_1; - const BYTE* match0 = base + matchIndex0; - const BYTE* match1 = base + matchIndex1; - U32 offcode; - -#if defined(__aarch64__) - PREFETCH_L1(ip0+256); -#endif - - hashTable[h0] = current0; /* update hash table */ - hashTable[h1] = current1; /* update hash table */ - - assert(ip0 + 1 == ip1); - - if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { - mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; - ip0 = ip2 - mLength; - match0 = repMatch - mLength; - mLength += 4; - offcode = 0; - goto _match; - } - if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { - /* found a regular match */ - goto _offset; - } - if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { - /* found a regular match after one literal */ - ip0 = ip1; - match0 = match1; - goto _offset; - } - { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; - assert(step >= 2); - ip0 += step; - ip1 += step; - continue; - } -_offset: /* Requires: ip0, match0 */ - /* Compute the offset code */ - offset_2 = offset_1; - offset_1 = (U32)(ip0-match0); - offcode = offset_1 + ZSTD_REP_MOVE; - mLength = 4; - /* Count the backwards match length */ - while (((ip0>anchor) & (match0>prefixStart)) - && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ - -_match: /* Requires: ip0, match0, offcode */ - /* Count the forward length */ - mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); - ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); - /* match found */ - ip0 += mLength; - anchor = ip0; - - if (ip0 <= ilimit) { - /* Fill Table */ - assert(base+current0+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - - if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ - while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } } } - ip1 = ip0 + 1; - } - - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_fast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - assert(ms->dictMatchState == NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); - } -} - -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_fast_dictMatchState_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 prefixStartIndex = ms->window.dictLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; - - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; - const U32* const dictHashTable = dms->hashTable; - const U32 dictStartIndex = dms->window.dictLimit; - const BYTE* const dictBase = dms->window.base; - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dms->window.nextSrc; - const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); - const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); - const U32 dictHLog = dictCParams->hashLog; - - /* if a dictionary is still attached, it necessarily means that - * it is within window size. So we just check it. */ - const U32 maxDistance = 1U << cParams->windowLog; - const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); - assert(endIndex - prefixStartIndex <= maxDistance); - (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ - - /* ensure there will be no no underflow - * when translating a dict index into a local index */ - assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); - - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); - ip += (dictAndPrefixLength == 0); - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h = ZSTD_hashPtr(ip, hlog, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndex = hashTable[h]; - const BYTE* match = base + matchIndex; - const U32 repIndex = current + 1 - offset_1; - const BYTE* repMatch = (repIndex < prefixStartIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - hashTable[h] = current; /* update hash table */ - - if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - } else if ( (matchIndex <= prefixStartIndex) ) { - size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); - U32 const dictMatchIndex = dictHashTable[dictHash]; - const BYTE* dictMatch = dictBase + dictMatchIndex; - if (dictMatchIndex <= dictStartIndex || - MEM_read32(dictMatch) != MEM_read32(ip)) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } else { - /* found a dict match */ - U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); - mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; - while (((ip>anchor) & (dictMatch>dictStart)) - && (ip[-1] == dictMatch[-1])) { - ip--; dictMatch--; mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - } else if (MEM_read32(match) != MEM_read32(ip)) { - /* it's not a match, and we're not going to check the dictionary */ - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } else { - /* found a regular match */ - U32 const offset = (U32)(ip-match); - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - while (((ip>anchor) & (match>prefixStart)) - && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - assert(base+current+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); - - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? - dictBase - dictIndexDelta + repIndex2 : - base + repIndex2; - if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } - } - } - - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - -size_t ZSTD_compressBlock_fast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - assert(ms->dictMatchState != NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); - } -} - - -static size_t ZSTD_compressBlock_fast_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); - const U32 dictStartIndex = lowLimit; - const BYTE* const dictStart = dictBase + dictStartIndex; - const U32 dictLimit = ms->window.dictLimit; - const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const dictEnd = dictBase + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=rep[0], offset_2=rep[1]; - - DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); - - /* switch to "regular" variant if extDict is invalidated due to maxDistance */ - if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hlog, mls); - const U32 matchIndex = hashTable[h]; - const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; - const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - hashTable[h] = current; /* update hash table */ - DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); - assert(offset_1 <= current +1); /* check repIndex */ - - if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - } else { - if ( (matchIndex < dictStartIndex) || - (MEM_read32(match) != MEM_read32(ip)) ) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } - { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - U32 const offset = current - matchIndex; - size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = offset; /* update offset history */ - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - ip += mLength; - anchor = ip; - } } - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } - - /* save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_fast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); - } -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - - - - -/*-************************************* -* Binary Tree search -***************************************/ - -namespace duckdb_zstd { - -static void -ZSTD_updateDUBT(ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iend, - U32 mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; - - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - - if (idx != target) - DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", - idx, target, ms->window.dictLimit); - assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ - (void)iend; - - assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ - for ( ; idx < target ; idx++) { - size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ - U32 const matchIndex = hashTable[h]; - - U32* const nextCandidatePtr = bt + 2*(idx&btMask); - U32* const sortMarkPtr = nextCandidatePtr + 1; - - DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); - hashTable[h] = idx; /* Update Hash Table */ - *nextCandidatePtr = matchIndex; /* update BT like a chain */ - *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; - } - ms->nextToUpdate = target; -} - - -/** ZSTD_insertDUBT1() : - * sort one already inserted but unsorted position - * assumption : current >= btlow == (current - btmask) - * doesn't fail */ -static void -ZSTD_insertDUBT1(ZSTD_matchState_t* ms, - U32 current, const BYTE* inputEnd, - U32 nbCompares, U32 btLow, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; - const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ - U32 dummy32; /* to be nullified at the end */ - U32 const windowValid = ms->window.lowLimit; - U32 const maxDistance = 1U << cParams->windowLog; - U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; - - - DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", - current, dictLimit, windowLow); - assert(current >= btLow); - assert(ip < iend); /* condition for ZSTD_count */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(matchIndex < current); - /* note : all candidates are now supposed sorted, - * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK - * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ - - if ( (dictMode != ZSTD_extDict) - || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ - || (current < dictLimit) /* both in extDict */) { - const BYTE* const mBase = ( (dictMode != ZSTD_extDict) - || (matchIndex+matchLength >= dictLimit)) ? - base : dictBase; - assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ - || (current < dictLimit) ); - match = mBase + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* preparation for next read of match[matchLength] */ - } - - DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", - current, matchIndex, (U32)matchLength); - - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ - } - - if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", - matchIndex, btLow, nextPtr[1]); - smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", - matchIndex, btLow, nextPtr[0]); - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; -} - - -static size_t -ZSTD_DUBT_findBetterDictMatch ( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - size_t bestLength, - U32 nbCompares, - U32 const mls, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_matchState_t * const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; - const U32 * const dictHashTable = dms->hashTable; - U32 const hashLog = dmsCParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 dictMatchIndex = dictHashTable[h]; - - const BYTE* const base = ms->window.base; - const BYTE* const prefixStart = base + ms->window.dictLimit; - U32 const current = (U32)(ip-base); - const BYTE* const dictBase = dms->window.base; - const BYTE* const dictEnd = dms->window.nextSrc; - U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); - U32 const dictLowLimit = dms->window.lowLimit; - U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; - - U32* const dictBt = dms->chainTable; - U32 const btLog = dmsCParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; - - size_t commonLengthSmaller=0, commonLengthLarger=0; - - (void)dictMode; - assert(dictMode == ZSTD_dictMatchState); - - while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { - U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match = dictBase + dictMatchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (dictMatchIndex+matchLength >= dictHighLimit) - match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ - - if (matchLength > bestLength) { - U32 matchIndex = dictMatchIndex + dictIndexDelta; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { - DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", - current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - } - if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } - - if (match[matchLength] < ip[matchLength]) { - if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ - commonLengthLarger = matchLength; - dictMatchIndex = nextPtr[0]; - } - } - - if (bestLength >= MINMATCH) { - U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - current, (U32)bestLength, (U32)*offsetPtr, mIndex); - } - return bestLength; - -} - - -static size_t -ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - U32 const mls, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 matchIndex = hashTable[h]; - - const BYTE* const base = ms->window.base; - U32 const current = (U32)(ip-base); - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); - - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 const btLow = (btMask >= current) ? 0 : current - btMask; - U32 const unsortLimit = MAX(btLow, windowLow); - - U32* nextCandidate = bt + 2*(matchIndex&btMask); - U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; - U32 nbCompares = 1U << cParams->searchLog; - U32 nbCandidates = nbCompares; - U32 previousCandidate = 0; - - DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); - assert(ip <= iend-8); /* required for h calculation */ - - /* reach end of unsorted candidates list */ - while ( (matchIndex > unsortLimit) - && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) - && (nbCandidates > 1) ) { - DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", - matchIndex); - *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ - previousCandidate = matchIndex; - matchIndex = *nextCandidate; - nextCandidate = bt + 2*(matchIndex&btMask); - unsortedMark = bt + 2*(matchIndex&btMask) + 1; - nbCandidates --; - } - - /* nullify last candidate if it's still unsorted - * simplification, detrimental to compression ratio, beneficial for speed */ - if ( (matchIndex > unsortLimit) - && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { - DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", - matchIndex); - *nextCandidate = *unsortedMark = 0; - } - - /* batch sort stacked candidates */ - matchIndex = previousCandidate; - while (matchIndex) { /* will end on matchIndex == 0 */ - U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; - U32 const nextCandidateIdx = *nextCandidateIdxPtr; - ZSTD_insertDUBT1(ms, matchIndex, iend, - nbCandidates, unsortLimit, dictMode); - matchIndex = nextCandidateIdx; - nbCandidates++; - } - - /* find longest match */ - { size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current + 8 + 1; - U32 dummy32; /* to be nullified at the end */ - size_t bestLength = 0; - - matchIndex = hashTable[h]; - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match; - - if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - if (dictMode == ZSTD_dictMatchState) { - nbCompares = 0; /* in addition to avoiding checking any - * further in this loop, make sure we - * skip checking in the dictionary. */ - } - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - - if (dictMode == ZSTD_dictMatchState && nbCompares) { - bestLength = ZSTD_DUBT_findBetterDictMatch( - ms, ip, iend, - offsetPtr, bestLength, nbCompares, - mls, dictMode); - } - - assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ - ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ - if (bestLength >= MINMATCH) { - U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - current, (U32)bestLength, (U32)*offsetPtr, mIndex); - } - return bestLength; - } -} - - -/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ -FORCE_INLINE_TEMPLATE size_t -ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 mls /* template */, - const ZSTD_dictMode_e dictMode) -{ - DEBUGLOG(7, "ZSTD_BtFindBestMatch"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateDUBT(ms, ip, iLimit, mls); - return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); -} - - -static size_t -ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); - } -} - - -static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); - } -} - - -static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); - } -} - - - -/* ********************************* -* Hash Chain -***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndex_internal( - ZSTD_matchState_t* ms, - const ZSTD_compressionParameters* const cParams, - const BYTE* ip, U32 const mls) -{ - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; - U32* const chainTable = ms->chainTable; - const U32 chainMask = (1 << cParams->chainLog) - 1; - const BYTE* const base = ms->window.base; - const U32 target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - ms->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -} - - -/* inlining is important to hardwire a hot branch (template emulation) */ -FORCE_INLINE_TEMPLATE -size_t ZSTD_HcFindBestMatch_generic ( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 mls, const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const chainTable = ms->chainTable; - const U32 chainSize = (1 << cParams->chainLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const U32 current = (U32)(ip-base); - const U32 maxDistance = 1U << cParams->windowLog; - const U32 lowestValid = ms->window.lowLimit; - const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - const U32 isDictionary = (ms->loadedDictEnd != 0); - const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; - const U32 minChain = current > chainSize ? current - chainSize : 0; - U32 nbAttempts = 1U << cParams->searchLog; - size_t ml=4-1; - - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); - - for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { - size_t currentMl=0; - if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ - if (match[ml] == ip[ml]) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex; - assert(match+4 <= dictEnd); - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - if (dictMode == ZSTD_dictMatchState) { - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32* const dmsChainTable = dms->chainTable; - const U32 dmsChainSize = (1 << dms->cParams.chainLog); - const U32 dmsChainMask = dmsChainSize - 1; - const U32 dmsLowestIndex = dms->window.dictLimit; - const BYTE* const dmsBase = dms->window.base; - const BYTE* const dmsEnd = dms->window.nextSrc; - const U32 dmsSize = (U32)(dmsEnd - dmsBase); - const U32 dmsIndexDelta = dictLimit - dmsSize; - const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; - - matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; - - for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { - size_t currentMl=0; - const BYTE* const match = dmsBase + matchIndex; - assert(match+4 <= dmsEnd); - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - - if (matchIndex <= dmsMinChain) break; - matchIndex = dmsChainTable[matchIndex & dmsChainMask]; - } - } - - return ml; -} - - -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); - } -} - - -static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); - } -} - - -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); - } -} - - -/* ******************************* -* Common parser - lazy strategy -*********************************/ -typedef enum { search_hashChain, search_binaryTree } searchMethod_e; - -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_lazy_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const searchMethod_e searchMethod, const U32 depth, - ZSTD_dictMode_e const dictMode) -{ - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 prefixLowestIndex = ms->window.dictLimit; - const BYTE* const prefixLowest = base + prefixLowestIndex; - - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS - : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS - : ZSTD_HcFindBestMatch_selectMLS); - U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; - - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? - dictBase + dictLowestIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); - - DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); - - /* init */ - ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; - if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } - - /* Match Loop */ -#if defined(__GNUC__) && defined(__x86_64__) - /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the - * code alignment is perturbed. To fix the instability align the loop on 32-bytes. - */ - __asm__(".p2align 5"); -#endif - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - - /* check repCode */ - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - if (depth==0) goto _storeSequence; - } - } - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - if (depth==0) goto _storeSequence; - } - - /* first search (depth 0) */ - { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < 4) { - ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) - offset_1; - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - } - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) - offset_1; - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - } - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* NOTE: - * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. - * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which - * overflows the pointer, which is undefined behavior. - */ - /* catch up */ - if (offset) { - if (dictMode == ZSTD_noDict) { - while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) - && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ - { start--; matchLength++; } - } - if (dictMode == ZSTD_dictMatchState) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; - const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - } - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex = current2 - offset_2; - const BYTE* repMatch = dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex ? - dictBase - dictIndexDelta + repIndex : - base + repIndex; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; - } - break; - } - } - - if (dictMode == ZSTD_noDict) { - while ( ((ip <= ilimit) & (offset_2>0)) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { - /* store sequence */ - matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - - /* Save reps for next block */ - rep[0] = offset_1 ? offset_1 : savedOffset; - rep[1] = offset_2 ? offset_2 : savedOffset; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_greedy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_btlazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_lazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_lazy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_greedy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); -} - - -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_lazy_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const searchMethod_e searchMethod, const U32 depth) -{ - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + ms->window.lowLimit; - const U32 windowLog = ms->cParams.windowLog; - - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; - - U32 offset_1 = rep[0], offset_2 = rep[1]; - - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); - - /* init */ - ip += (ip == prefixStart); - - /* Match Loop */ -#if defined(__GNUC__) && defined(__x86_64__) - /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the - * code alignment is perturbed. To fix the instability align the loop on 32-bytes. - */ - __asm__(".p2align 5"); -#endif - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - U32 current = (U32)(ip-base); - - /* check repCode */ - { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); - const U32 repIndex = (U32)(current+1 - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip+1) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; - if (depth==0) goto _storeSequence; - } } - - /* first search (depth 0) */ - { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < 4) { - ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 1 */ - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 2 */ - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repCurrent = (U32)(ip-base); - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); - const U32 repIndex = repCurrent - offset_2; - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; - } } - - /* Save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_greedy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); -} - -size_t ZSTD_compressBlock_lazy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); -} - -size_t ZSTD_compressBlock_lazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); -} - -size_t ZSTD_compressBlock_btlazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - - - - /* ZSTD_fillHashTable() */ - /* ZSTD_fillDoubleHashTable() */ - -#define LDM_BUCKET_SIZE_LOG 3 -#define LDM_MIN_MATCH_LENGTH 64 -#define LDM_HASH_RLOG 7 -#define LDM_HASH_CHAR_OFFSET 10 - -namespace duckdb_zstd { - -void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams) -{ - params->windowLog = cParams->windowLog; - ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); - DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); - if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; - if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; - if (cParams->strategy >= ZSTD_btopt) { - /* Get out of the way of the optimal parser */ - U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); - assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); - assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); - params->minMatchLength = minMatch; - } - if (params->hashLog == 0) { - params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); - assert(params->hashLog <= ZSTD_HASHLOG_MAX); - } - if (params->hashRateLog == 0) { - params->hashRateLog = params->windowLog < params->hashLog - ? 0 - : params->windowLog - params->hashLog; - } - params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); -} - -size_t ZSTD_ldm_getTableSize(ldmParams_t params) -{ - size_t const ldmHSize = ((size_t)1) << params.hashLog; - size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); - size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); - size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) - + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); - return params.enableLdm ? totalSize : 0; -} - -size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) -{ - return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; -} - -/** ZSTD_ldm_getSmallHash() : - * numBits should be <= 32 - * If numBits==0, returns 0. - * @return : the most significant numBits of value. */ -static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) -{ - assert(numBits <= 32); - return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); -} - -/** ZSTD_ldm_getChecksum() : - * numBitsToDiscard should be <= 32 - * @return : the next most significant 32 bits after numBitsToDiscard */ -static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) -{ - assert(numBitsToDiscard <= 32); - return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; -} - -/** ZSTD_ldm_getTag() ; - * Given the hash, returns the most significant numTagBits bits - * after (32 + hbits) bits. - * - * If there are not enough bits remaining, return the last - * numTagBits bits. */ -static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) -{ - assert(numTagBits < 32 && hbits <= 32); - if (32 - hbits < numTagBits) { - return hash & (((U32)1 << numTagBits) - 1); - } else { - return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); - } -} - -/** ZSTD_ldm_getBucket() : - * Returns a pointer to the start of the bucket associated with hash. */ -static ldmEntry_t* ZSTD_ldm_getBucket( - ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) -{ - return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); -} - -/** ZSTD_ldm_insertEntry() : - * Insert the entry with corresponding hash into the hash table */ -static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, - size_t const hash, const ldmEntry_t entry, - ldmParams_t const ldmParams) -{ - BYTE* const bucketOffsets = ldmState->bucketOffsets; - *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; - bucketOffsets[hash]++; - bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; -} - -/** ZSTD_ldm_makeEntryAndInsertByTag() : - * - * Gets the small hash, checksum, and tag from the rollingHash. - * - * If the tag matches (1 << ldmParams.hashRateLog)-1, then - * creates an ldmEntry from the offset, and inserts it into the hash table. - * - * hBits is the length of the small hash, which is the most significant hBits - * of rollingHash. The checksum is the next 32 most significant bits, followed - * by ldmParams.hashRateLog bits that make up the tag. */ -static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, - U64 const rollingHash, - U32 const hBits, - U32 const offset, - ldmParams_t const ldmParams) -{ - U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); - U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; - if (tag == tagMask) { - U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - ldmEntry_t entry; - entry.offset = offset; - entry.checksum = checksum; - ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); - } -} - -/** ZSTD_ldm_countBackwardsMatch() : - * Returns the number of bytes that match backwards before pIn and pMatch. - * - * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ -static size_t ZSTD_ldm_countBackwardsMatch( - const BYTE* pIn, const BYTE* pAnchor, - const BYTE* pMatch, const BYTE* pBase) -{ - size_t matchLength = 0; - while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { - pIn--; - pMatch--; - matchLength++; - } - return matchLength; -} - -/** ZSTD_ldm_fillFastTables() : - * - * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. - * This is similar to ZSTD_loadDictionaryContent. - * - * The tables for the other strategies are filled within their - * block compressors. */ -static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, - void const* end) -{ - const BYTE* const iend = (const BYTE*)end; - - switch(ms->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); - break; - - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); - break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - case ZSTD_btlazy2: - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - break; - default: - assert(0); /* not possible : not a valid strategy id */ - } - - return 0; -} - -/** ZSTD_ldm_fillLdmHashTable() : - * - * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). - * lastHash is the rolling hash that corresponds to lastHashed. - * - * Returns the rolling hash corresponding to position iend-1. */ -static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, - U64 lastHash, const BYTE* lastHashed, - const BYTE* iend, const BYTE* base, - U32 hBits, ldmParams_t const ldmParams) -{ - U64 rollingHash = lastHash; - const BYTE* cur = lastHashed + 1; - - while (cur < iend) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], - cur[ldmParams.minMatchLength-1], - state->hashPower); - ZSTD_ldm_makeEntryAndInsertByTag(state, - rollingHash, hBits, - (U32)(cur - base), ldmParams); - ++cur; - } - return rollingHash; -} - -void ZSTD_ldm_fillHashTable( - ldmState_t* state, const BYTE* ip, - const BYTE* iend, ldmParams_t const* params) -{ - DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); - if ((size_t)(iend - ip) >= params->minMatchLength) { - U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); - ZSTD_ldm_fillLdmHashTable( - state, startingHash, ip, iend - params->minMatchLength, state->window.base, - params->hashLog - params->bucketSizeLog, - *params); - } -} - - -/** ZSTD_ldm_limitTableUpdate() : - * - * Sets cctx->nextToUpdate to a position corresponding closer to anchor - * if it is far way - * (after a long match, only update tables a limited amount). */ -static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) -{ - U32 const current = (U32)(anchor - ms->window.base); - if (current > ms->nextToUpdate + 1024) { - ms->nextToUpdate = - current - MIN(512, current - ms->nextToUpdate - 1024); - } -} - -static size_t ZSTD_ldm_generateSequences_internal( - ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, - ldmParams_t const* params, void const* src, size_t srcSize) -{ - /* LDM parameters */ - int const extDict = ZSTD_window_hasExtDict(ldmState->window); - U32 const minMatchLength = params->minMatchLength; - U64 const hashPower = ldmState->hashPower; - U32 const hBits = params->hashLog - params->bucketSizeLog; - U32 const ldmBucketSize = 1U << params->bucketSizeLog; - U32 const hashRateLog = params->hashRateLog; - U32 const ldmTagMask = (1U << params->hashRateLog) - 1; - /* Prefix and extDict parameters */ - U32 const dictLimit = ldmState->window.dictLimit; - U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; - BYTE const* const base = ldmState->window.base; - BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; - BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; - BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; - BYTE const* const lowPrefixPtr = base + dictLimit; - /* Input bounds */ - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); - /* Input positions */ - BYTE const* anchor = istart; - BYTE const* ip = istart; - /* Rolling hash */ - BYTE const* lastHashed = NULL; - U64 rollingHash = 0; - - while (ip <= ilimit) { - size_t mLength; - U32 const current = (U32)(ip - base); - size_t forwardMatchLength = 0, backwardMatchLength = 0; - ldmEntry_t* bestEntry = NULL; - if (ip != istart) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], - lastHashed[minMatchLength], - hashPower); - } else { - rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); - } - lastHashed = ip; - - /* Do not insert and do not look for a match */ - if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { - ip++; - continue; - } - - /* Get the best entry and compute the match lengths */ - { - ldmEntry_t* const bucket = - ZSTD_ldm_getBucket(ldmState, - ZSTD_ldm_getSmallHash(rollingHash, hBits), - *params); - ldmEntry_t* cur; - size_t bestMatchLength = 0; - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - - for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { - size_t curForwardMatchLength, curBackwardMatchLength, - curTotalMatchLength; - if (cur->checksum != checksum || cur->offset <= lowestIndex) { - continue; - } - if (extDict) { - BYTE const* const curMatchBase = - cur->offset < dictLimit ? dictBase : base; - BYTE const* const pMatch = curMatchBase + cur->offset; - BYTE const* const matchEnd = - cur->offset < dictLimit ? dictEnd : iend; - BYTE const* const lowMatchPtr = - cur->offset < dictLimit ? dictStart : lowPrefixPtr; - - curForwardMatchLength = ZSTD_count_2segments( - ip, pMatch, iend, - matchEnd, lowPrefixPtr); - if (curForwardMatchLength < minMatchLength) { - continue; - } - curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowMatchPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; - } else { /* !extDict */ - BYTE const* const pMatch = base + cur->offset; - curForwardMatchLength = ZSTD_count(ip, pMatch, iend); - if (curForwardMatchLength < minMatchLength) { - continue; - } - curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowPrefixPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; - } - - if (curTotalMatchLength > bestMatchLength) { - bestMatchLength = curTotalMatchLength; - forwardMatchLength = curForwardMatchLength; - backwardMatchLength = curBackwardMatchLength; - bestEntry = cur; - } - } - } - - /* No match found -- continue searching */ - if (bestEntry == NULL) { - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, - hBits, current, - *params); - ip++; - continue; - } - - /* Match found */ - mLength = forwardMatchLength + backwardMatchLength; - ip -= backwardMatchLength; - - { - /* Store the sequence: - * ip = current - backwardMatchLength - * The match is at (bestEntry->offset - backwardMatchLength) - */ - U32 const matchIndex = bestEntry->offset; - U32 const offset = current - matchIndex; - rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; - - /* Out of sequence storage */ - if (rawSeqStore->size == rawSeqStore->capacity) - return ERROR(dstSize_tooSmall); - seq->litLength = (U32)(ip - anchor); - seq->matchLength = (U32)mLength; - seq->offset = offset; - rawSeqStore->size++; - } - - /* Insert the current entry into the hash table */ - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, - (U32)(lastHashed - base), - *params); - - assert(ip + backwardMatchLength == lastHashed); - - /* Fill the hash table from lastHashed+1 to ip+mLength*/ - /* Heuristic: don't need to fill the entire table at end of block */ - if (ip + mLength <= ilimit) { - rollingHash = ZSTD_ldm_fillLdmHashTable( - ldmState, rollingHash, lastHashed, - ip + mLength, base, hBits, *params); - lastHashed = ip + mLength - 1; - } - ip += mLength; - anchor = ip; - } - return iend - anchor; -} - -/*! ZSTD_ldm_reduceTable() : - * reduce table indexes by `reducerValue` */ -static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, - U32 const reducerValue) -{ - U32 u; - for (u = 0; u < size; u++) { - if (table[u].offset < reducerValue) table[u].offset = 0; - else table[u].offset -= reducerValue; - } -} - -size_t ZSTD_ldm_generateSequences( - ldmState_t* ldmState, rawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize) -{ - U32 const maxDist = 1U << params->windowLog; - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - size_t const kMaxChunkSize = 1 << 20; - size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); - size_t chunk; - size_t leftoverSize = 0; - - assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); - /* Check that ZSTD_window_update() has been called for this chunk prior - * to passing it to this function. - */ - assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); - /* The input could be very large (in zstdmt), so it must be broken up into - * chunks to enforce the maximum distance and handle overflow correction. - */ - assert(sequences->pos <= sequences->size); - assert(sequences->size <= sequences->capacity); - for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { - BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; - size_t const remaining = (size_t)(iend - chunkStart); - BYTE const *const chunkEnd = - (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; - size_t const chunkSize = chunkEnd - chunkStart; - size_t newLeftoverSize; - size_t const prevSize = sequences->size; - - assert(chunkStart < iend); - /* 1. Perform overflow correction if necessary. */ - if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { - U32 const ldmHSize = 1U << params->hashLog; - U32 const correction = ZSTD_window_correctOverflow( - &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); - ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); - /* invalidate dictionaries on overflow correction */ - ldmState->loadedDictEnd = 0; - } - /* 2. We enforce the maximum offset allowed. - * - * kMaxChunkSize should be small enough that we don't lose too much of - * the window through early invalidation. - * TODO: * Test the chunk size. - * * Try invalidation after the sequence generation and test the - * the offset against maxDist directly. - * - * NOTE: Because of dictionaries + sequence splitting we MUST make sure - * that any offset used is valid at the END of the sequence, since it may - * be split into two sequences. This condition holds when using - * ZSTD_window_enforceMaxDist(), but if we move to checking offsets - * against maxDist directly, we'll have to carefully handle that case. - */ - ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); - /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ - newLeftoverSize = ZSTD_ldm_generateSequences_internal( - ldmState, sequences, params, chunkStart, chunkSize); - if (ZSTD_isError(newLeftoverSize)) - return newLeftoverSize; - /* 4. We add the leftover literals from previous iterations to the first - * newly generated sequence, or add the `newLeftoverSize` if none are - * generated. - */ - /* Prepend the leftover literals from the last call */ - if (prevSize < sequences->size) { - sequences->seq[prevSize].litLength += (U32)leftoverSize; - leftoverSize = newLeftoverSize; - } else { - assert(newLeftoverSize == chunkSize); - leftoverSize += chunkSize; - } - } - return 0; -} - -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { - while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { - rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; - if (srcSize <= seq->litLength) { - /* Skip past srcSize literals */ - seq->litLength -= (U32)srcSize; - return; - } - srcSize -= seq->litLength; - seq->litLength = 0; - if (srcSize < seq->matchLength) { - /* Skip past the first srcSize of the match */ - seq->matchLength -= (U32)srcSize; - if (seq->matchLength < minMatch) { - /* The match is too short, omit it */ - if (rawSeqStore->pos + 1 < rawSeqStore->size) { - seq[1].litLength += seq[0].matchLength; - } - rawSeqStore->pos++; - } - return; - } - srcSize -= seq->matchLength; - seq->matchLength = 0; - rawSeqStore->pos++; - } -} - -/** - * If the sequence length is longer than remaining then the sequence is split - * between this block and the next. - * - * Returns the current sequence to handle, or if the rest of the block should - * be literals, it returns a sequence with offset == 0. - */ -static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, - U32 const remaining, U32 const minMatch) -{ - rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; - assert(sequence.offset > 0); - /* Likely: No partial sequence */ - if (remaining >= sequence.litLength + sequence.matchLength) { - rawSeqStore->pos++; - return sequence; - } - /* Cut the sequence short (offset == 0 ==> rest is literals). */ - if (remaining <= sequence.litLength) { - sequence.offset = 0; - } else if (remaining < sequence.litLength + sequence.matchLength) { - sequence.matchLength = remaining - sequence.litLength; - if (sequence.matchLength < minMatch) { - sequence.offset = 0; - } - } - /* Skip past `remaining` bytes for the future sequences. */ - ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); - return sequence; -} - -size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - unsigned const minMatch = cParams->minMatch; - ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); - /* Input bounds */ - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - /* Input positions */ - BYTE const* ip = istart; - - DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); - assert(rawSeqStore->pos <= rawSeqStore->size); - assert(rawSeqStore->size <= rawSeqStore->capacity); - /* Loop through each sequence and apply the block compressor to the lits */ - while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { - /* maybeSplitSequence updates rawSeqStore->pos */ - rawSeq const sequence = maybeSplitSequence(rawSeqStore, - (U32)(iend - ip), minMatch); - int i; - /* End signal */ - if (sequence.offset == 0) - break; - - assert(ip + sequence.litLength + sequence.matchLength <= iend); - - /* Fill tables for block compressor */ - ZSTD_ldm_limitTableUpdate(ms, ip); - ZSTD_ldm_fillFastTables(ms, ip); - /* Run the block compressor */ - DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); - { - size_t const newLitLength = - blockCompressor(ms, seqStore, rep, ip, sequence.litLength); - ip += sequence.litLength; - /* Update the repcodes */ - for (i = ZSTD_REP_NUM - 1; i > 0; i--) - rep[i] = rep[i-1]; - rep[0] = sequence.offset; - /* Store the sequence */ - ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, - sequence.offset + ZSTD_REP_MOVE, - sequence.matchLength - MINMATCH); - ip += sequence.matchLength; - } - } - /* Fill the tables for the block compressor */ - ZSTD_ldm_limitTableUpdate(ms, ip); - ZSTD_ldm_fillFastTables(ms, ip); - /* Compress the last literals */ - return blockCompressor(ms, seqStore, rep, ip, iend - ip); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - - - - - -#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ -#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ -#define ZSTD_MAX_PRICE (1<<30) - -#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ - - -/*-************************************* -* Price functions for optimal parser -***************************************/ - -#if 0 /* approximation at bit level */ -# define BITCOST_ACCURACY 0 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) -#elif 0 /* fractional bit accuracy */ -# define BITCOST_ACCURACY 8 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) -#else /* opt==approx, ultra==accurate */ -# define BITCOST_ACCURACY 8 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) -#endif - -namespace duckdb_zstd { - -MEM_STATIC U32 ZSTD_bitWeight(U32 stat) -{ - return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); -} - -MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) -{ - U32 const stat = rawStat + 1; - U32 const hb = ZSTD_highbit32(stat); - U32 const BWeight = hb * BITCOST_MULTIPLIER; - U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; - U32 const weight = BWeight + FWeight; - assert(hb + BITCOST_ACCURACY < 31); - return weight; -} - -#if (DEBUGLEVEL>=2) -/* debugging function, - * @return price in bytes as fractional value - * for debug messages only */ -MEM_STATIC double ZSTD_fCost(U32 price) -{ - return (double)price / (BITCOST_MULTIPLIER*8); -} -#endif - -static int ZSTD_compressedLiterals(optState_t const* const optPtr) -{ - return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; -} - -static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) -{ - if (ZSTD_compressedLiterals(optPtr)) - optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); - optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); - optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); - optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); -} - - -/* ZSTD_downscaleStat() : - * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) - * return the resulting sum of elements */ -static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) -{ - U32 s, sum=0; - DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); - assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); - for (s=0; s> (ZSTD_FREQ_DIV+malus)); - sum += table[s]; - } - return sum; -} - -/* ZSTD_rescaleFreqs() : - * if first block (detected by optPtr->litLengthSum == 0) : init statistics - * take hints from dictionary if there is one - * or init from zero, using src for literals stats, or flat 1 for match symbols - * otherwise downscale existing stats, to be used as seed for next block. - */ -static void -ZSTD_rescaleFreqs(optState_t* const optPtr, - const BYTE* const src, size_t const srcSize, - int const optLevel) -{ - int const compressedLiterals = ZSTD_compressedLiterals(optPtr); - DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); - optPtr->priceType = zop_dynamic; - - if (optPtr->litLengthSum == 0) { /* first block : init */ - if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ - DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); - optPtr->priceType = zop_predef; - } - - assert(optPtr->symbolCosts != NULL); - if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { - /* huffman table presumed generated by dictionary */ - optPtr->priceType = zop_dynamic; - - if (compressedLiterals) { - unsigned lit; - assert(optPtr->litFreq != NULL); - optPtr->litSum = 0; - for (lit=0; lit<=MaxLit; lit++) { - U32 const scaleLog = 11; /* scale to 2K */ - U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); - assert(bitCost <= scaleLog); - optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->litSum += optPtr->litFreq[lit]; - } } - - { unsigned ll; - FSE_CState_t llstate; - FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); - optPtr->litLengthSum = 0; - for (ll=0; ll<=MaxLL; ll++) { - U32 const scaleLog = 10; /* scale to 1K */ - U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); - assert(bitCost < scaleLog); - optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->litLengthSum += optPtr->litLengthFreq[ll]; - } } - - { unsigned ml; - FSE_CState_t mlstate; - FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); - optPtr->matchLengthSum = 0; - for (ml=0; ml<=MaxML; ml++) { - U32 const scaleLog = 10; - U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); - assert(bitCost < scaleLog); - optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; - } } - - { unsigned of; - FSE_CState_t ofstate; - FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); - optPtr->offCodeSum = 0; - for (of=0; of<=MaxOff; of++) { - U32 const scaleLog = 10; - U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); - assert(bitCost < scaleLog); - optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->offCodeSum += optPtr->offCodeFreq[of]; - } } - - } else { /* not a dictionary */ - - assert(optPtr->litFreq != NULL); - if (compressedLiterals) { - unsigned lit = MaxLit; - HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); - } - - { unsigned ll; - for (ll=0; ll<=MaxLL; ll++) - optPtr->litLengthFreq[ll] = 1; - } - optPtr->litLengthSum = MaxLL+1; - - { unsigned ml; - for (ml=0; ml<=MaxML; ml++) - optPtr->matchLengthFreq[ml] = 1; - } - optPtr->matchLengthSum = MaxML+1; - - { unsigned of; - for (of=0; of<=MaxOff; of++) - optPtr->offCodeFreq[of] = 1; - } - optPtr->offCodeSum = MaxOff+1; - - } - - } else { /* new block : re-use previous statistics, scaled down */ - - if (compressedLiterals) - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); - optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); - } - - ZSTD_setBasePrices(optPtr, optLevel); -} - -/* ZSTD_rawLiteralsCost() : - * price of literals (only) in specified segment (which length can be 0). - * does not include price of literalLength symbol */ -static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr, - int optLevel) -{ - if (litLength == 0) return 0; - - if (!ZSTD_compressedLiterals(optPtr)) - return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ - - if (optPtr->priceType == zop_predef) - return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ - - /* dynamic statistics */ - { U32 price = litLength * optPtr->litSumBasePrice; - U32 u; - for (u=0; u < litLength; u++) { - assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ - price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); - } - return price; - } -} - -/* ZSTD_litLengthPrice() : - * cost of literalLength symbol */ -static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) -{ - if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); - - /* dynamic statistics */ - { U32 const llCode = ZSTD_LLcode(litLength); - return (ZSTDInternalConstants::LL_bits[llCode] * BITCOST_MULTIPLIER) - + optPtr->litLengthSumBasePrice - - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); - } -} - -/* ZSTD_getMatchPrice() : - * Provides the cost of the match part (offset + matchLength) of a sequence - * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. - * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ -FORCE_INLINE_TEMPLATE U32 -ZSTD_getMatchPrice(U32 const offset, - U32 const matchLength, - const optState_t* const optPtr, - int const optLevel) -{ - U32 price; - U32 const offCode = ZSTD_highbit32(offset+1); - U32 const mlBase = matchLength - MINMATCH; - assert(matchLength >= MINMATCH); - - if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ - return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); - - /* dynamic statistics */ - price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); - if ((optLevel<2) /*static*/ && offCode >= 20) - price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ - - /* match Length */ - { U32 const mlCode = ZSTD_MLcode(mlBase); - price += (ZSTDInternalConstants::ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); - } - - price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ - - DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); - return price; -} - -/* ZSTD_updateStats() : - * assumption : literals + litLengtn <= iend */ -static void ZSTD_updateStats(optState_t* const optPtr, - U32 litLength, const BYTE* literals, - U32 offsetCode, U32 matchLength) -{ - /* literals */ - if (ZSTD_compressedLiterals(optPtr)) { - U32 u; - for (u=0; u < litLength; u++) - optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; - optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; - } - - /* literal Length */ - { U32 const llCode = ZSTD_LLcode(litLength); - optPtr->litLengthFreq[llCode]++; - optPtr->litLengthSum++; - } - - /* match offset code (0-2=>repCode; 3+=>offset+2) */ - { U32 const offCode = ZSTD_highbit32(offsetCode+1); - assert(offCode <= MaxOff); - optPtr->offCodeFreq[offCode]++; - optPtr->offCodeSum++; - } - - /* match Length */ - { U32 const mlBase = matchLength - MINMATCH; - U32 const mlCode = ZSTD_MLcode(mlBase); - optPtr->matchLengthFreq[mlCode]++; - optPtr->matchLengthSum++; - } -} - - -/* ZSTD_readMINMATCH() : - * function safe only for comparisons - * assumption : memPtr must be at least 4 bytes before end of buffer */ -MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) -{ - switch (length) - { - default : - case 4 : return MEM_read32(memPtr); - case 3 : if (MEM_isLittleEndian()) - return MEM_read32(memPtr)<<8; - else - return MEM_read32(memPtr)>>8; - } -} - - -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* const ip) -{ - U32* const hashTable3 = ms->hashTable3; - U32 const hashLog3 = ms->hashLog3; - const BYTE* const base = ms->window.base; - U32 idx = *nextToUpdate3; - U32 const target = (U32)(ip - base); - size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); - assert(hashLog3 > 0); - - while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; - idx++; - } - - *nextToUpdate3 = target; - return hashTable3[hash3]; -} - - -/*-************************************* -* Binary Tree search -***************************************/ -/** ZSTD_insertBt1() : add one or multiple positions to tree. - * ip : assumed <= iend-8 . - * @return : nb of positions added */ -static U32 ZSTD_insertBt1( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - U32 const mls, const int extDict) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = ms->window.lowLimit; - U32 matchEndIdx = current+8+1; - size_t bestLength = 8; - U32 nbCompares = 1U << cParams->searchLog; -#ifdef ZSTD_C_PREDICT - U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); - U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); - predictedSmall += (predictedSmall>0); - predictedLarge += (predictedLarge>0); -#endif /* ZSTD_C_PREDICT */ - - DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); - - assert(ip <= iend-8); /* required for h calculation */ - hashTable[h] = current; /* Update Hash Table */ - - assert(windowLow > 0); - while (nbCompares-- && (matchIndex >= windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(matchIndex < current); - -#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ - const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ - if (matchIndex == predictedSmall) { - /* no need to check length, result known */ - *smallerPtr = matchIndex; - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - predictedSmall = predictPtr[1] + (predictPtr[1]>0); - continue; - } - if (matchIndex == predictedLarge) { - *largerPtr = matchIndex; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - predictedLarge = predictPtr[0] + (predictPtr[0]>0); - continue; - } -#endif - - if (!extDict || (matchIndex+matchLength >= dictLimit)) { - assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ - match = base + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - bestLength = matchLength; - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - } - - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ - } - - if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - { U32 positions = 0; - if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - assert(matchEndIdx > current + 8); - return MAX(positions, matchEndIdx - (current + 8)); - } -} - -FORCE_INLINE_TEMPLATE -void ZSTD_updateTree_internal( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - const U32 mls, const ZSTD_dictMode_e dictMode) -{ - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", - idx, target, dictMode); - - while(idx < target) { - U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); - assert(idx < (U32)(idx + forward)); - idx += forward; - } - assert((size_t)(ip - base) <= (size_t)(U32)(-1)); - assert((size_t)(iend - base) <= (size_t)(U32)(-1)); - ms->nextToUpdate = target; -} - -void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { - ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); -} - -FORCE_INLINE_TEMPLATE -U32 ZSTD_insertBtAndGetAllMatches ( - ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ - ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, - const U32 rep[ZSTD_REP_NUM], - U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ - const U32 lengthToBeat, - U32 const mls /* template */) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); - const BYTE* const base = ms->window.base; - U32 const current = (U32)(ip-base); - U32 const hashLog = cParams->hashLog; - U32 const minMatch = (mls==3) ? 3 : 4; - U32* const hashTable = ms->hashTable; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 matchIndex = hashTable[h]; - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask= (1U << btLog) - 1; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const dictBase = ms->window.dictBase; - U32 const dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - U32 const btLow = (btMask >= current) ? 0 : current - btMask; - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); - U32 const matchLow = windowLow ? windowLow : 1; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ - U32 dummy32; /* to be nullified at the end */ - U32 mnum = 0; - U32 nbCompares = 1U << cParams->searchLog; - - const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; - const ZSTD_compressionParameters* const dmsCParams = - dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; - const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; - const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; - U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; - U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; - U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; - U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; - U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; - U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; - U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; - - size_t bestLength = lengthToBeat-1; - DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); - - /* check repCode */ - assert(ll0 <= 1); /* necessarily 1 or 0 */ - { U32 const lastR = ZSTD_REP_NUM + ll0; - U32 repCode; - for (repCode = ll0; repCode < lastR; repCode++) { - U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - U32 const repIndex = current - repOffset; - U32 repLen = 0; - assert(current >= dictLimit); - if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ - /* We must validate the repcode offset because when we're using a dictionary the - * valid offset range shrinks when the dictionary goes out of bounds. - */ - if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { - repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; - } - } else { /* repIndex < dictLimit || repIndex >= current */ - const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? - dmsBase + repIndex - dmsIndexDelta : - dictBase + repIndex; - assert(current >= windowLow); - if ( dictMode == ZSTD_extDict - && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ - & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; - } - if (dictMode == ZSTD_dictMatchState - && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */ - & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; - } } - /* save longer solution */ - if (repLen > bestLength) { - DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", - repCode, ll0, repOffset, repLen); - bestLength = repLen; - matches[mnum].off = repCode - ll0; - matches[mnum].len = (U32)repLen; - mnum++; - if ( (repLen > sufficient_len) - | (ip+repLen == iLimit) ) { /* best possible */ - return mnum; - } } } } - - /* HC3 match finder */ - if ((mls == 3) /*static*/ && (bestLength < mls)) { - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); - if ((matchIndex3 >= matchLow) - & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { - size_t mlen; - if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { - const BYTE* const match = base + matchIndex3; - mlen = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex3; - mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); - } - - /* save best solution */ - if (mlen >= mls /* == 3 > bestLength */) { - DEBUGLOG(8, "found small match with hlog3, of length %u", - (U32)mlen); - bestLength = mlen; - assert(current > matchIndex3); - assert(mnum==0); /* no prior solution */ - matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; - matches[0].len = (U32)mlen; - mnum = 1; - if ( (mlen > sufficient_len) | - (ip+mlen == iLimit) ) { /* best possible length */ - ms->nextToUpdate = current+1; /* skip insertion */ - return 1; - } } } - /* no dictMatchState lookup: dicts don't have a populated HC3 table */ - } - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex >= matchLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - const BYTE* match; - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(current > matchIndex); - - if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { - assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ - match = base + matchIndex; - if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); - } else { - match = dictBase + matchIndex; - assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* prepare for match[matchLength] read */ - } - - if (matchLength > bestLength) { - DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", - (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); - assert(matchEndIdx > matchIndex); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) - | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { - if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ - break; /* drop, to preserve bt consistency (miss a little bit of compression) */ - } - } - - if (match[matchLength] < ip[matchLength]) { - /* match smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ - } else { - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - - if (dictMode == ZSTD_dictMatchState && nbCompares) { - size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); - U32 dictMatchIndex = dms->hashTable[dmsH]; - const U32* const dmsBt = dms->chainTable; - commonLengthSmaller = commonLengthLarger = 0; - while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { - const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match = dmsBase + dictMatchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); - if (dictMatchIndex+matchLength >= dmsHighLimit) - match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ - - if (matchLength > bestLength) { - matchIndex = dictMatchIndex + dmsIndexDelta; - DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", - (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) - | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } - - if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ - if (match[matchLength] < ip[matchLength]) { - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - commonLengthLarger = matchLength; - dictMatchIndex = nextPtr[0]; - } - } - } - - assert(matchEndIdx > current+8); - ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ - return mnum; -} - - -FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( - ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ - ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, - const U32 rep[ZSTD_REP_NUM], - U32 const ll0, - U32 const lengthToBeat) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const matchLengthSearch = cParams->minMatch; - DEBUGLOG(8, "ZSTD_BtGetAllMatches"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); - switch(matchLengthSearch) - { - case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); - default : - case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); - case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); - case 7 : - case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); - } -} - - -/*-******************************* -* Optimal parser -*********************************/ - - -static U32 ZSTD_totalLen(ZSTD_optimal_t sol) -{ - return sol.litlen + sol.mlen; -} - -#if 0 /* debug */ - -static void -listStats(const U32* table, int lastEltID) -{ - int const nbElts = lastEltID + 1; - int enb; - for (enb=0; enb < nbElts; enb++) { - (void)table; - /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ - RAWLOG(2, "%4i,", table[enb]); - } - RAWLOG(2, " \n"); -} - -#endif - -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const int optLevel, - const ZSTD_dictMode_e dictMode) -{ - optState_t* const optStatePtr = &ms->opt; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const BYTE* const prefixStart = base + ms->window.dictLimit; - const ZSTD_compressionParameters* const cParams = &ms->cParams; - - U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); - U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; - U32 nextToUpdate3 = ms->nextToUpdate; - - ZSTD_optimal_t* const opt = optStatePtr->priceTable; - ZSTD_match_t* const matches = optStatePtr->matchTable; - ZSTD_optimal_t lastSequence; - - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", - (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); - assert(optLevel <= 2); - ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); - ip += (ip==prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, last_pos = 0; - - /* find first match */ - { U32 const litlen = (U32)(ip - anchor); - U32 const ll0 = !litlen; - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); - if (!nbMatches) { ip++; continue; } - - /* initialize opt[0] */ - { U32 i ; for (i=0; i immediate encoding */ - { U32 const maxML = matches[nbMatches-1].len; - U32 const maxOffset = matches[nbMatches-1].off; - DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", - nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); - - if (maxML > sufficient_len) { - lastSequence.litlen = litlen; - lastSequence.mlen = maxML; - lastSequence.off = maxOffset; - DEBUGLOG(6, "large match (%u>%u), immediate encoding", - maxML, sufficient_len); - cur = 0; - last_pos = ZSTD_totalLen(lastSequence); - goto _shortestPath; - } } - - /* set prices for first matches starting position == 0 */ - { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 pos; - U32 matchNb; - for (pos = 1; pos < minMatch; pos++) { - opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ - } - for (matchNb = 0; matchNb < nbMatches; matchNb++) { - U32 const offset = matches[matchNb].off; - U32 const end = matches[matchNb].len; - for ( ; pos <= end ; pos++ ) { - U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); - U32 const sequencePrice = literalsPrice + matchPrice; - DEBUGLOG(7, "rPos:%u => set initial price : %.2f", - pos, ZSTD_fCost(sequencePrice)); - opt[pos].mlen = pos; - opt[pos].off = offset; - opt[pos].litlen = litlen; - opt[pos].price = sequencePrice; - } } - last_pos = pos-1; - } - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - const BYTE* const inr = ip + cur; - assert(cur < ZSTD_OPT_NUM); - DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) - - /* Fix current position with one literal if cheaper */ - { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; - int const price = opt[cur-1].price - + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) - + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) - - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); - assert(price < 1000000000); /* overflow check */ - if (price <= opt[cur].price) { - DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, - opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); - opt[cur].mlen = 0; - opt[cur].off = 0; - opt[cur].litlen = litlen; - opt[cur].price = price; - } else { - DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), - opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); - } - } - - /* Set the repcodes of the current position. We must do it here - * because we rely on the repcodes of the 2nd to last sequence being - * correct to set the next chunks repcodes during the backward - * traversal. - */ - ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); - assert(cur >= opt[cur].mlen); - if (opt[cur].mlen != 0) { - U32 const prev = cur - opt[cur].mlen; - repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); - memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); - } else { - memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); - } - - /* last match must start at a minimum distance of 8 from oend */ - if (inr > ilimit) continue; - - if (cur == last_pos) break; - - if ( (optLevel==0) /*static_test*/ - && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { - DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); - continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ - } - - { U32 const ll0 = (opt[cur].mlen != 0); - U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; - U32 const previousPrice = opt[cur].price; - U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); - U32 matchNb; - if (!nbMatches) { - DEBUGLOG(7, "rPos:%u : no match found", cur); - continue; - } - - { U32 const maxML = matches[nbMatches-1].len; - DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", - inr-istart, cur, nbMatches, maxML); - - if ( (maxML > sufficient_len) - || (cur + maxML >= ZSTD_OPT_NUM) ) { - lastSequence.mlen = maxML; - lastSequence.off = matches[nbMatches-1].off; - lastSequence.litlen = litlen; - cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ - last_pos = cur + ZSTD_totalLen(lastSequence); - if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ - goto _shortestPath; - } } - - /* set prices using matches found at position == cur */ - for (matchNb = 0; matchNb < nbMatches; matchNb++) { - U32 const offset = matches[matchNb].off; - U32 const lastML = matches[matchNb].len; - U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; - U32 mlen; - - DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", - matchNb, matches[matchNb].off, lastML, litlen); - - for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ - U32 const pos = cur + mlen; - int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); - - if ((pos > last_pos) || (price < opt[pos].price)) { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", - pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ - opt[pos].mlen = mlen; - opt[pos].off = offset; - opt[pos].litlen = litlen; - opt[pos].price = price; - } else { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", - pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); - if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ - } - } } } - } /* for (cur = 1; cur <= last_pos; cur++) */ - - lastSequence = opt[last_pos]; - cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ - assert(cur < ZSTD_OPT_NUM); /* control overflow*/ - -_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ - assert(opt[0].mlen == 0); - - /* Set the next chunk's repcodes based on the repcodes of the beginning - * of the last match, and the last sequence. This avoids us having to - * update them while traversing the sequences. - */ - if (lastSequence.mlen != 0) { - repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); - memcpy(rep, &reps, sizeof(reps)); - } else { - memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); - } - - { U32 const storeEnd = cur + 1; - U32 storeStart = storeEnd; - U32 seqPos = cur; - - DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", - last_pos, cur); (void)last_pos; - assert(storeEnd < ZSTD_OPT_NUM); - DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", - storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); - opt[storeEnd] = lastSequence; - while (seqPos > 0) { - U32 const backDist = ZSTD_totalLen(opt[seqPos]); - storeStart--; - DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", - seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); - opt[storeStart] = opt[seqPos]; - seqPos = (seqPos > backDist) ? seqPos - backDist : 0; - } - - /* save sequences */ - DEBUGLOG(6, "sending selected sequences into seqStore") - { U32 storePos; - for (storePos=storeStart; storePos <= storeEnd; storePos++) { - U32 const llen = opt[storePos].litlen; - U32 const mlen = opt[storePos].mlen; - U32 const offCode = opt[storePos].off; - U32 const advance = llen + mlen; - DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", - anchor - istart, (unsigned)llen, (unsigned)mlen); - - if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ - assert(storePos == storeEnd); /* must be last sequence */ - ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ - continue; /* will finish */ - } - - assert(anchor + llen <= iend); - ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); - ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); - anchor += advance; - ip = anchor; - } } - ZSTD_setBasePrices(optStatePtr, optLevel); - } - } /* while (ip < ilimit) */ - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_btopt( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock_btopt"); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); -} - - -/* used in 2-pass strategy */ -static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) -{ - U32 s, sum=0; - assert(ZSTD_FREQ_DIV+bonus >= 0); - for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); - optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); -} - -/* ZSTD_initStats_ultra(): - * make a first compression pass, just to seed stats with more accurate starting values. - * only works on first block, with no dictionary and no ldm. - * this function cannot error, hence its contract must be respected. - */ -static void -ZSTD_initStats_ultra(ZSTD_matchState_t* ms, - seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ - memcpy(tmpRep, rep, sizeof(tmpRep)); - - DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); - assert(ms->opt.litLengthSum == 0); /* first block */ - assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ - assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ - assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ - - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ - - /* invalidate first scan from history */ - ZSTD_resetSeqStore(seqStore); - ms->window.base -= srcSize; - ms->window.dictLimit += (U32)srcSize; - ms->window.lowLimit = ms->window.dictLimit; - ms->nextToUpdate = ms->window.dictLimit; - - /* re-inforce weight of collected statistics */ - ZSTD_upscaleStats(&ms->opt); -} - -size_t ZSTD_compressBlock_btultra( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_btultra2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - U32 const current = (U32)((const BYTE*)src - ms->window.base); - DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); - - /* 2-pass strategy: - * this strategy makes a first pass over first block to collect statistics - * and seed next round's statistics with it. - * After 1st pass, function forgets everything, and starts a new block. - * Consequently, this can only work if no data has been previously loaded in tables, - * aka, no dictionary, no prefix, no ldm preprocessing. - * The compression ratio gain is generally small (~0.5% on first block), - * the cost is 2x cpu time on first block. */ - assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - if ( (ms->opt.litLengthSum==0) /* first block */ - && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ - && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ - && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ - && (srcSize > ZSTD_PREDEF_THRESHOLD) - ) { - ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); - } - - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_btultra_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_btopt_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); -} - -size_t ZSTD_compressBlock_btultra_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); -} - -/* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries - * and is only specific for the first block (no prefix) */ - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * huff0 huffman decoder, - * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************************************** -* Dependencies -****************************************************************/ -#include /* memcpy, memset */ - - /* BIT_* */ - /* to compress headers */ - - - - -namespace duckdb_zstd { -/* ************************************************************** -* Macros -****************************************************************/ - -/* These two optional macros force the use one way or another of the two - * Huffman decompression implementations. You can't force in both directions - * at the same time. - */ -#if defined(HUF_FORCE_DECOMPRESS_X1) && \ - defined(HUF_FORCE_DECOMPRESS_X2) -#error "Cannot force the use of the X1 and X2 decoders at the same time!" -#endif - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define HUF_isError ERR_isError - - -/* ************************************************************** -* Byte alignment for workSpace management -****************************************************************/ -#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) -#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) - - -/* ************************************************************** -* BMI2 Variant Wrappers -****************************************************************/ -#if DYNAMIC_BMI2 - -#define HUF_DGEN(fn) \ - \ - static size_t fn##_default( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - if (bmi2) { \ - return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ - } - -#else - -#define HUF_DGEN(fn) \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - (void)bmi2; \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } - -#endif - - -/*-***************************/ -/* generic DTableDesc */ -/*-***************************/ -typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; - -static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) -{ - DTableDesc dtd; - memcpy(&dtd, table, sizeof(dtd)); - return dtd; -} - - -#ifndef HUF_FORCE_DECOMPRESS_X2 - -/*-***************************/ -/* single-symbol decoding */ -/*-***************************/ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ - -size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) -{ - U32 tableLog = 0; - U32 nbSymbols = 0; - size_t iSize; - void* const dtPtr = DTable + 1; - HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; - - U32* rankVal; - BYTE* huffWeight; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); - - DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; - - /* Table header */ - { DTableDesc dtd = HUF_getDTableDesc(DTable); - if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ - dtd.tableType = 0; - dtd.tableLog = (BYTE)tableLog; - memcpy(DTable, &dtd, sizeof(dtd)); - } - - /* Calculate starting value for each rank */ - { U32 n, nextRankStart = 0; - for (n=1; n> 1; - size_t const uStart = rankVal[w]; - size_t const uEnd = uStart + length; - size_t u; - HUF_DEltX1 D; - D.byte = (BYTE)n; - D.nbBits = (BYTE)(tableLog + 1 - w); - rankVal[w] = (U32)uEnd; - if (length < 4) { - /* Use length in the loop bound so the compiler knows it is short. */ - for (u = 0; u < length; ++u) - dt[uStart + u] = D; - } else { - /* Unroll the loop 4 times, we know it is a power of 2. */ - for (u = uStart; u < uEnd; u += 4) { - dt[u + 0] = D; - dt[u + 1] = D; - dt[u + 2] = D; - dt[u + 3] = D; - } } } } - return iSize; -} - -size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX1_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); -} - -FORCE_INLINE_TEMPLATE BYTE -HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - BYTE const c = dt[val].byte; - BIT_skipBits(Dstream, dt[val].nbBits); - return c; -} - -#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ - *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) - -#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) - -HINT_INLINE size_t -HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; - - /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_1(p, bitDPtr); - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - } - - /* [0-3] symbols remaining */ - if (MEM_32bits()) - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - - /* no more data to retrieve from bitstream, no need to reload */ - while (p < pEnd) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - - return pEnd-pStart; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X1_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + dstSize; - const void* dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - BIT_DStream_t bitD; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - - HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); - - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); - - return dstSize; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X1_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - /* Check */ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* const olimit = oend - 3; - const void* const dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - U32 endSignal = 1; - - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); - - /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - for ( ; (endSignal) & (op4 < olimit) ; ) { - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_1(op1, &bitD1); - HUF_DECODE_SYMBOLX1_1(op2, &bitD2); - HUF_DECODE_SYMBOLX1_1(op3, &bitD3); - HUF_DECODE_SYMBOLX1_1(op4, &bitD4); - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_0(op1, &bitD1); - HUF_DECODE_SYMBOLX1_0(op2, &bitD2); - HUF_DECODE_SYMBOLX1_0(op3, &bitD3); - HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; - } - - /* check corruption */ - /* note : should not be necessary : op# advance in lock step, and we control op4. - * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); - - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } - - /* decoded size */ - return dstSize; - } -} - - -typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, - const void *cSrc, - size_t cSrcSize, - const HUF_DTable *DTable); - -HUF_DGEN(HUF_decompress1X1_usingDTable_internal) -HUF_DGEN(HUF_decompress4X1_usingDTable_internal) - - - -size_t HUF_decompress1X1_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); -} - - -size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); -} - -size_t HUF_decompress4X1_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} - -size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); -} - - -size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - -#endif /* HUF_FORCE_DECOMPRESS_X2 */ - - -#ifndef HUF_FORCE_DECOMPRESS_X1 - -/* *************************/ -/* double-symbols decoding */ -/* *************************/ - -typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ -typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; -typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; -typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; - - -/* HUF_fillDTableX2Level2() : - * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, - const U32* rankValOrigin, const int minWeight, - const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, - U32 nbBitsBaseline, U16 baseSeq) -{ - HUF_DEltX2 DElt; - U32 rankVal[HUF_TABLELOG_MAX + 1]; - - /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill skipped values */ - if (minWeight>1) { - U32 i, skipSize = rankVal[minWeight]; - MEM_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; - } - - /* fill DTable */ - { U32 s; for (s=0; s= 1 */ - - rankVal[weight] += length; - } } -} - - -static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, const U32 sortedListSize, - const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline) -{ - U32 rankVal[HUF_TABLELOG_MAX + 1]; - const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ - const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; - - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill DTable */ - for (s=0; s= minBits) { /* enough room for a second symbol */ - U32 sortedRank; - int minWeight = nbBits + scaleLog; - if (minWeight < 1) minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, - rankValOrigin[nbBits], minWeight, - sortedList+sortedRank, sortedListSize-sortedRank, - nbBitsBaseline, symbol); - } else { - HUF_DEltX2 DElt; - MEM_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - { U32 const end = start + length; - U32 u; - for (u = start; u < end; u++) DTable[u] = DElt; - } } - rankVal[weight] += length; - } -} - -size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, - const void* src, size_t srcSize, - void* workSpace, size_t wkspSize) -{ - U32 tableLog, maxW, sizeOfSort, nbSymbols; - DTableDesc dtd = HUF_getDTableDesc(DTable); - U32 const maxTableLog = dtd.maxTableLog; - size_t iSize; - void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ - HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; - U32 *rankStart; - - rankValCol_t* rankVal; - U32* rankStats; - U32* rankStart0; - sortedSymbol_t* sortedSymbol; - BYTE* weightList; - size_t spaceUsed32 = 0; - - rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; - rankStats = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - rankStart0 = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 2; - sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); - spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; - weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); - - rankStart = rankStart0 + 1; - memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); - - DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ - if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; - - /* check result */ - if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ - - /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ - - /* Get start index of each weight */ - { U32 w, nextRankStart = 0; - for (w=1; w> consumed; - } } } } - - HUF_fillDTableX2(dt, maxTableLog, - sortedSymbol, sizeOfSort, - rankStart0, rankVal, maxW, - tableLog+1); - - dtd.tableLog = (BYTE)maxTableLog; - dtd.tableType = 1; - memcpy(DTable, &dtd, sizeof(dtd)); - return iSize; -} - -size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX2_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); -} - - -FORCE_INLINE_TEMPLATE U32 -HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 2); - BIT_skipBits(DStream, dt[val].nbBits); - return dt[val].length; -} - -FORCE_INLINE_TEMPLATE U32 -HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 1); - if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); - else { - if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { - BIT_skipBits(DStream, dt[val].nbBits); - if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) - /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ - DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); - } } - return 1; -} - -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -HINT_INLINE size_t -HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, - const HUF_DEltX2* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; - - /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - } - - /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - - while (p <= pEnd-2) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ - - if (p < pEnd) - p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); - - return p-pStart; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X2_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - BIT_DStream_t bitD; - - /* Init */ - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - - /* decode */ - { BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); - } - - /* check */ - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X2_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* const olimit = oend - (sizeof(size_t)-1); - const void* const dtPtr = DTable+1; - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - size_t const segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal = 1; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - for ( ; (endSignal) & (op4 < olimit); ) { -#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; -#else - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = (U32)LIKELY( - (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); -#endif - } - - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } - - /* decoded size */ - return dstSize; - } -} - -HUF_DGEN(HUF_decompress1X2_usingDTable_internal) -HUF_DGEN(HUF_decompress4X2_usingDTable_internal) - -size_t HUF_decompress1X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); -} - - -size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - -size_t HUF_decompress4X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} - -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); -} - - -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - -#endif /* HUF_FORCE_DECOMPRESS_X1 */ - - -/* ***********************************/ -/* Universal decompression selectors */ -/* ***********************************/ - -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#endif -} - -size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#endif -} - - -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) -typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; -static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = -{ - /* single, double, quad */ - {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ - {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ - {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ - {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ - {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ - {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ - {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ - {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ - {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ - {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ - {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ - {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ - {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ - {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ - {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ - {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ -}; -#endif - -/** HUF_selectDecoder() : - * Tells which decoder is likely to decode faster, - * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . - * Assumption : 0 < dstSize <= 128 KB */ -U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) -{ - assert(dstSize > 0); - assert(dstSize <= 128*1024); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dstSize; - (void)cSrcSize; - return 0; -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dstSize; - (void)cSrcSize; - return 1; -#else - /* decoder timing evaluation */ - { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ - U32 const D256 = (U32)(dstSize >> 8); - U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); - U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ - return DTime1 < DTime0; - } -#endif -} - - -typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); - -size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) - static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; -#endif - - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); -#else - return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); -#endif - } -} - -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#else - return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; -#endif - } -} - -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - - -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, - size_t dstSize, const void* cSrc, - size_t cSrcSize, void* workSpace, - size_t wkspSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize == 0) return ERROR(corruption_detected); - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#else - return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize): - HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#endif - } -} - -size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#else - return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize): - HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#endif - } -} - -size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - - -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#endif -} - -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} -#endif - -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#endif -} - -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize == 0) return ERROR(corruption_detected); - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#else - return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : - HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#endif - } -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* zstd_ddict.c : - * concentrates all logic that needs to know the internals of ZSTD_DDict object */ - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ - /* low level memory routines */ - - - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -/* zstd_decompress_internal: - * objects and definitions shared within lib/decompress modules */ - - #ifndef ZSTD_DECOMPRESS_INTERNAL_H - #define ZSTD_DECOMPRESS_INTERNAL_H - - -/*-******************************************************* - * Dependencies - *********************************************************/ - /* BYTE, U16, U32 */ - /* ZSTD_seqSymbol */ - -namespace duckdb_zstd { - -/*-******************************************************* - * Constants - *********************************************************/ -struct ZSTDConstants { - static const U32 LL_base[MaxLL+1]; - static const U32 OF_base[MaxOff+1]; - static const U32 OF_bits[MaxOff+1]; - static const U32 ML_base[MaxML+1]; -}; - - -/*-******************************************************* - * Decompression types - *********************************************************/ - typedef struct { - U32 fastMode; - U32 tableLog; - } ZSTD_seqSymbol_header; - - typedef struct { - U16 nextState; - BYTE nbAdditionalBits; - BYTE nbBits; - U32 baseValue; - } ZSTD_seqSymbol; - - #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) - -typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U32 rep[ZSTD_REP_NUM]; -} ZSTD_entropyDTables_t; - -typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, - ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, - ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, - ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; - -typedef enum { zdss_init=0, zdss_loadHeader, - zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; - -typedef enum { - ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ - ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ - ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ -} ZSTD_dictUses_e; - -typedef enum { - ZSTD_obm_buffered = 0, /* Buffer the output */ - ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ -} ZSTD_outBufferMode_e; - -struct ZSTD_DCtx_s -{ - const ZSTD_seqSymbol* LLTptr; - const ZSTD_seqSymbol* MLTptr; - const ZSTD_seqSymbol* OFTptr; - const HUF_DTable* HUFptr; - ZSTD_entropyDTables_t entropy; - U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ - const void* previousDstEnd; /* detect continuity */ - const void* prefixStart; /* start of current segment */ - const void* virtualStart; /* virtual start of previous segment if it was just before current one */ - const void* dictEnd; /* end of previous segment */ - size_t expected; - ZSTD_frameHeader fParams; - U64 decodedSize; - blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ - ZSTD_dStage stage; - U32 litEntropy; - U32 fseEntropy; - XXH64_state_t xxhState; - size_t headerSize; - ZSTD_format_e format; - const BYTE* litPtr; - ZSTD_customMem customMem; - size_t litSize; - size_t rleSize; - size_t staticSize; - int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ - - /* dictionary */ - ZSTD_DDict* ddictLocal; - const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ - U32 dictID; - int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ - ZSTD_dictUses_e dictUses; - - /* streaming */ - ZSTD_dStreamStage streamStage; - char* inBuff; - size_t inBuffSize; - size_t inPos; - size_t maxWindowSize; - char* outBuff; - size_t outBuffSize; - size_t outStart; - size_t outEnd; - size_t lhSize; - void* legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; - U32 hostageByte; - int noForwardProgress; - ZSTD_outBufferMode_e outBufferMode; - ZSTD_outBuffer expectedOutBuffer; - - /* workspace */ - BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; - - size_t oversizedDuration; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - void const* dictContentBeginForFuzzing; - void const* dictContentEndForFuzzing; -#endif -}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ - - -/*-******************************************************* - * Shared internal functions - *********************************************************/ - -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ -size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize); - -/*! ZSTD_checkContinuity() : - * check if next `dst` follows previous position, where decompression ended. - * If yes, do nothing (continue on current segment). - * If not, classify previous segment as "external dictionary", and start a new segment. - * This function cannot fail. */ -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); - -} - -#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -#ifndef ZSTD_DDICT_H -#define ZSTD_DDICT_H - -/*-******************************************************* - * Dependencies - *********************************************************/ -#include /* size_t */ - /* ZSTD_DDict, and several public functions */ - -namespace duckdb_zstd { -/*-******************************************************* - * Interface - *********************************************************/ - -/* note: several prototypes are already published in `zstd.h` : - * ZSTD_createDDict() - * ZSTD_createDDict_byReference() - * ZSTD_createDDict_advanced() - * ZSTD_freeDDict() - * ZSTD_initStaticDDict() - * ZSTD_sizeof_DDict() - * ZSTD_estimateDDictSize() - * ZSTD_getDictID_fromDict() - */ - -const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); -size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); - -void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); - -} - -#endif /* ZSTD_DDICT_H */ - - -// LICENSE_CHANGE_END - - -// #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -// # include "../legacy/zstd_legacy.h" -// #endif - -namespace duckdb_zstd { - -/*-******************************************************* -* Types -*********************************************************/ -struct ZSTD_DDict_s { - void* dictBuffer; - const void* dictContent; - size_t dictSize; - ZSTD_entropyDTables_t entropy; - U32 dictID; - U32 entropyPresent; - ZSTD_customMem cMem; -}; /* typedef'd to ZSTD_DDict within "zstd.h" */ - -const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) -{ - assert(ddict != NULL); - return ddict->dictContent; -} - -size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) -{ - assert(ddict != NULL); - return ddict->dictSize; -} - -void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - DEBUGLOG(4, "ZSTD_copyDDictParameters"); - assert(dctx != NULL); - assert(ddict != NULL); - dctx->dictID = ddict->dictID; - dctx->prefixStart = ddict->dictContent; - dctx->virtualStart = ddict->dictContent; - dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; - dctx->previousDstEnd = dctx->dictEnd; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentBeginForFuzzing = dctx->prefixStart; - dctx->dictContentEndForFuzzing = dctx->previousDstEnd; -#endif - if (ddict->entropyPresent) { - dctx->litEntropy = 1; - dctx->fseEntropy = 1; - dctx->LLTptr = ddict->entropy.LLTable; - dctx->MLTptr = ddict->entropy.MLTable; - dctx->OFTptr = ddict->entropy.OFTable; - dctx->HUFptr = ddict->entropy.hufTable; - dctx->entropy.rep[0] = ddict->entropy.rep[0]; - dctx->entropy.rep[1] = ddict->entropy.rep[1]; - dctx->entropy.rep[2] = ddict->entropy.rep[2]; - } else { - dctx->litEntropy = 0; - dctx->fseEntropy = 0; - } -} - - -static size_t -ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, - ZSTD_dictContentType_e dictContentType) -{ - ddict->dictID = 0; - ddict->entropyPresent = 0; - if (dictContentType == ZSTD_dct_rawContent) return 0; - - if (ddict->dictSize < 8) { - if (dictContentType == ZSTD_dct_fullDict) - return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ - return 0; /* pure content mode */ - } - { U32 const magic = MEM_readLE32(ddict->dictContent); - if (magic != ZSTD_MAGIC_DICTIONARY) { - if (dictContentType == ZSTD_dct_fullDict) - return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ - return 0; /* pure content mode */ - } - } - ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); - - /* load entropy tables */ - RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( - &ddict->entropy, ddict->dictContent, ddict->dictSize)), - dictionary_corrupted, ""); - ddict->entropyPresent = 1; - return 0; -} - - -static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { - ddict->dictBuffer = NULL; - ddict->dictContent = dict; - if (!dict) dictSize = 0; - } else { - void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); - ddict->dictBuffer = internalBuffer; - ddict->dictContent = internalBuffer; - if (!internalBuffer) return ERROR(memory_allocation); - memcpy(internalBuffer, dict, dictSize); - } - ddict->dictSize = dictSize; - ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - - /* parse dictionary content */ - FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); - - return 0; -} - -ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem) -{ - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - - { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); - if (ddict == NULL) return NULL; - ddict->cMem = customMem; - { size_t const initResult = ZSTD_initDDict_internal(ddict, - dict, dictSize, - dictLoadMethod, dictContentType); - if (ZSTD_isError(initResult)) { - ZSTD_freeDDict(ddict); - return NULL; - } } - return ddict; - } -} - -/*! ZSTD_createDDict() : -* Create a digested dictionary, to start decompression without startup delay. -* `dict` content is copied inside DDict. -* Consequently, `dict` can be released after `ZSTD_DDict` creation */ -ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) -{ - ZSTD_customMem const allocator = { NULL, NULL, NULL }; - return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); -} - -/*! ZSTD_createDDict_byReference() : - * Create a digested dictionary, to start decompression without startup delay. - * Dictionary content is simply referenced, it will be accessed during decompression. - * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ -ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) -{ - ZSTD_customMem const allocator = { NULL, NULL, NULL }; - return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); -} - - -const ZSTD_DDict* ZSTD_initStaticDDict( - void* sBuffer, size_t sBufferSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - size_t const neededSpace = sizeof(ZSTD_DDict) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); - ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; - assert(sBuffer != NULL); - assert(dict != NULL); - if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ - if (sBufferSize < neededSpace) return NULL; - if (dictLoadMethod == ZSTD_dlm_byCopy) { - memcpy(ddict+1, dict, dictSize); /* local copy */ - dict = ddict+1; - } - if (ZSTD_isError( ZSTD_initDDict_internal(ddict, - dict, dictSize, - ZSTD_dlm_byRef, dictContentType) )) - return NULL; - return ddict; -} - - -size_t ZSTD_freeDDict(ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = ddict->cMem; - ZSTD_free(ddict->dictBuffer, cMem); - ZSTD_free(ddict, cMem); - return 0; - } -} - -/*! ZSTD_estimateDDictSize() : - * Estimate amount of memory that will be needed to create a dictionary for decompression. - * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ -size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) -{ - return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); -} - -size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; -} - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; - return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -/* *************************************************************** -* Tuning parameters -*****************************************************************/ -/*! - * HEAPMODE : - * Select how default decompression function ZSTD_decompress() allocates its context, - * on stack (0), or into heap (1, default; requires malloc()). - * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. - */ -#ifndef ZSTD_HEAPMODE -# define ZSTD_HEAPMODE 1 -#endif - -/*! -* LEGACY_SUPPORT : -* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) -*/ -#ifndef ZSTD_LEGACY_SUPPORT -# define ZSTD_LEGACY_SUPPORT 0 -#endif - -/*! - * MAXWINDOWSIZE_DEFAULT : - * maximum window size accepted by DStream __by default__. - * Frames requiring more memory will be rejected. - * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). - */ -#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT -# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) -#endif - -/*! - * NO_FORWARD_PROGRESS_MAX : - * maximum allowed nb of calls to ZSTD_decompressStream() - * without any forward progress - * (defined as: no byte read from input, and no byte flushed to output) - * before triggering an error. - */ -#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX -# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 -#endif - - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ - /* low level memory routines */ - - - - - /* blockProperties_t */ - /* ZSTD_DCtx */ - /* ZSTD_DDictDictContent */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -#ifndef ZSTD_DEC_BLOCK_H -#define ZSTD_DEC_BLOCK_H - -/*-******************************************************* - * Dependencies - *********************************************************/ -#include /* size_t */ - /* DCtx, and some public functions */ - /* blockProperties_t, and some public functions */ - /* ZSTD_seqSymbol */ - -namespace duckdb_zstd { - -/* === Prototypes === */ - -/* note: prototypes already published within `zstd.h` : - * ZSTD_decompressBlock() - */ - -/* note: prototypes already published within `zstd_internal.h` : - * ZSTD_getcBlockSize() - * ZSTD_decodeSeqHeaders() - */ - - -/* ZSTD_decompressBlock_internal() : - * decompress block, starting at `src`, - * into destination buffer `dst`. - * @return : decompressed block size, - * or an error code (which can be tested using ZSTD_isError()) - */ -size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame); - -/* ZSTD_buildFSETable() : - * generate FSE decoding table for one symbol (ll, ml or off) - * this function must be called with valid parameters only - * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) - * in which case it cannot fail. - * Internal use only. - */ -void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, - const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog); - -} - -#endif /* ZSTD_DEC_BLOCK_H */ - - -// LICENSE_CHANGE_END - /* ZSTD_decompressBlock_internal */ - -// #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -// # include "../legacy/zstd_legacy.h" -// #endif -namespace duckdb_zstd { -const U32 ZSTDConstants::LL_base[MaxLL+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 18, 20, 22, 24, 28, 32, 40, - 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, - 0x2000, 0x4000, 0x8000, 0x10000 }; - -const U32 ZSTDConstants::OF_base[MaxOff+1] = { - 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, - 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, - 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; - -const U32 ZSTDConstants::OF_bits[MaxOff+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31 }; - -const U32 ZSTDConstants::ML_base[MaxML+1] = { - 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, - 35, 37, 39, 41, 43, 47, 51, 59, - 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, - 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; - -const size_t ZSTDInternalConstants::ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -const U32 ZSTDInternalConstants::LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 6, 7, 8, 9,10,11,12, - 13,14,15,16 }; -const S16 ZSTDInternalConstants::LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 3, 2, 1, 1, 1, 1, 1, - -1,-1,-1,-1 }; -#define LL_DEFAULTNORMLOG 6 /* for static allocation */ -const U32 ZSTDInternalConstants::LL_defaultNormLog = LL_DEFAULTNORMLOG; -const U32 ZSTDInternalConstants::ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 7, 8, 9,10,11, - 12,13,14,15,16 }; -const S16 ZSTDInternalConstants::ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1,-1,-1, - -1,-1,-1,-1,-1 }; -#define ML_DEFAULTNORMLOG 6 /* for static allocation */ -const U32 ZSTDInternalConstants::ML_defaultNormLog = ML_DEFAULTNORMLOG; - -const S16 ZSTDInternalConstants::OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - -1,-1,-1,-1,-1 }; -#define OF_DEFAULTNORMLOG 5 /* for static allocation */ -const U32 ZSTDInternalConstants::OF_defaultNormLog = OF_DEFAULTNORMLOG; -const U32 ZSTDInternalConstants::repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; - -const ZSTD_customMem ZSTDInternalConstants::ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ - -/*-************************************************************* -* Context management -***************************************************************/ -size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) -{ - if (dctx==NULL) return 0; /* support sizeof NULL */ - return sizeof(*dctx) - + ZSTD_sizeof_DDict(dctx->ddictLocal) - + dctx->inBuffSize + dctx->outBuffSize; -} - -size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } - - -static size_t ZSTD_startingInputLength(ZSTD_format_e format) -{ - size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); - /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ - assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); - return startingInputLength; -} - -static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) -{ - dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ - dctx->staticSize = 0; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - dctx->ddict = NULL; - dctx->ddictLocal = NULL; - dctx->dictEnd = NULL; - dctx->ddictIsCold = 0; - dctx->dictUses = ZSTD_dont_use; - dctx->inBuff = NULL; - dctx->inBuffSize = 0; - dctx->outBuffSize = 0; - dctx->streamStage = zdss_init; - dctx->legacyContext = NULL; - dctx->previousLegacyVersion = 0; - dctx->noForwardProgress = 0; - dctx->oversizedDuration = 0; - dctx->bmi2 = 0; - dctx->outBufferMode = ZSTD_obm_buffered; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentEndForFuzzing = NULL; -#endif -} - -ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) -{ - ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; - - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ - - ZSTD_initDCtx_internal(dctx); - dctx->staticSize = workspaceSize; - dctx->inBuff = (char*)(dctx+1); - return dctx; -} - -ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) -{ - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - - { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); - if (!dctx) return NULL; - dctx->customMem = customMem; - ZSTD_initDCtx_internal(dctx); - return dctx; - } -} - -ZSTD_DCtx* ZSTD_createDCtx(void) -{ - DEBUGLOG(3, "ZSTD_createDCtx"); - return ZSTD_createDCtx_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -static void ZSTD_clearDict(ZSTD_DCtx* dctx) -{ - ZSTD_freeDDict(dctx->ddictLocal); - dctx->ddictLocal = NULL; - dctx->ddict = NULL; - dctx->dictUses = ZSTD_dont_use; -} - -size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) -{ - if (dctx==NULL) return 0; /* support free on NULL */ - RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); - { ZSTD_customMem const cMem = dctx->customMem; - ZSTD_clearDict(dctx); - ZSTD_free(dctx->inBuff, cMem); - dctx->inBuff = NULL; -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (dctx->legacyContext) - ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); -#endif - ZSTD_free(dctx, cMem); - return 0; - } -} - -/* no longer useful */ -void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) -{ - size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); - memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ -} - - -/*-************************************************************* - * Frame header decoding - ***************************************************************/ - -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -unsigned ZSTD_isFrame(const void* buffer, size_t size) -{ - if (size < ZSTD_FRAMEIDSIZE) return 0; - { U32 const magic = MEM_readLE32(buffer); - if (magic == ZSTD_MAGICNUMBER) return 1; - if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; - } -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(buffer, size)) return 1; -#endif - return 0; -} - -static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; -static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; -/** ZSTD_frameHeaderSize_internal() : - * srcSize must be large enough to reach header size fields. - * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. - * @return : size of the Frame Header - * or an error code, which can be tested with ZSTD_isError() */ -static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) -{ - size_t const minInputSize = ZSTD_startingInputLength(format); - RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); - - { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; - U32 const dictID= fhd & 3; - U32 const singleSegment = (fhd >> 5) & 1; - U32 const fcsId = fhd >> 6; - return minInputSize + !singleSegment - + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] - + (singleSegment && !fcsId); - } -} - -/** ZSTD_frameHeaderSize() : - * srcSize must be >= ZSTD_frameHeaderSize_prefix. - * @return : size of the Frame Header, - * or an error code (if srcSize is too small) */ -size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) -{ - return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); -} - - -/** ZSTD_getFrameHeader_advanced() : - * decode Frame Header, or require larger `srcSize`. - * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) -{ - const BYTE* ip = (const BYTE*)src; - size_t const minInputSize = ZSTD_startingInputLength(format); - - memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ - if (srcSize < minInputSize) return minInputSize; - RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); - - if ( (format != ZSTD_f_zstd1_magicless) - && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { - if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - /* skippable frame */ - if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) - return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ - memset(zfhPtr, 0, sizeof(*zfhPtr)); - zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); - zfhPtr->frameType = ZSTD_skippableFrame; - return 0; - } - RETURN_ERROR(prefix_unknown, ""); - } - - /* ensure there is enough `srcSize` to fully read/decode frame header */ - { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); - if (srcSize < fhsize) return fhsize; - zfhPtr->headerSize = (U32)fhsize; - } - - { BYTE const fhdByte = ip[minInputSize-1]; - size_t pos = minInputSize; - U32 const dictIDSizeCode = fhdByte&3; - U32 const checksumFlag = (fhdByte>>2)&1; - U32 const singleSegment = (fhdByte>>5)&1; - U32 const fcsID = fhdByte>>6; - U64 windowSize = 0; - U32 dictID = 0; - U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; - RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, - "reserved bits, must be zero"); - - if (!singleSegment) { - BYTE const wlByte = ip[pos++]; - U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); - windowSize = (1ULL << windowLog); - windowSize += (windowSize >> 3) * (wlByte&7); - } - switch(dictIDSizeCode) - { - default: assert(0); /* impossible */ - case 0 : break; - case 1 : dictID = ip[pos]; pos++; break; - case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; - case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; - } - switch(fcsID) - { - default: assert(0); /* impossible */ - case 0 : if (singleSegment) frameContentSize = ip[pos]; break; - case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; - case 2 : frameContentSize = MEM_readLE32(ip+pos); break; - case 3 : frameContentSize = MEM_readLE64(ip+pos); break; - } - if (singleSegment) windowSize = frameContentSize; - - zfhPtr->frameType = ZSTD_frame; - zfhPtr->frameContentSize = frameContentSize; - zfhPtr->windowSize = windowSize; - zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - zfhPtr->dictID = dictID; - zfhPtr->checksumFlag = checksumFlag; - } - return 0; -} - -/** ZSTD_getFrameHeader() : - * decode Frame Header, or require larger `srcSize`. - * note : this function does not consume input, it only reads it. - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) -{ - return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); -} - - -/** ZSTD_getFrameContentSize() : - * compatible with legacy mode - * @return : decompressed size of the single frame pointed to be `src` if known, otherwise - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) -{ -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { - unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); - return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; - } -#endif - { ZSTD_frameHeader zfh; - if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) - return ZSTD_CONTENTSIZE_ERROR; - if (zfh.frameType == ZSTD_skippableFrame) { - return 0; - } else { - return zfh.frameContentSize; - } } -} - -static size_t readSkippableFrameSize(void const* src, size_t srcSize) -{ - size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; - U32 sizeU32; - - RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); - - sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); - RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, - frameParameter_unsupported, ""); - { - size_t const skippableSize = skippableHeaderSize + sizeU32; - RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); - return skippableSize; - } -} - -/** ZSTD_findDecompressedSize() : - * compatible with legacy mode - * `srcSize` must be the exact length of some number of ZSTD compressed and/or - * skippable frames - * @return : decompressed size of the frames contained */ -unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) -{ - unsigned long long totalDstSize = 0; - - while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { - U32 const magicNumber = MEM_readLE32(src); - - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t const skippableSize = readSkippableFrameSize(src, srcSize); - if (ZSTD_isError(skippableSize)) { - return ZSTD_CONTENTSIZE_ERROR; - } - assert(skippableSize <= srcSize); - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } - - { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; - - /* check for overflow */ - if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; - totalDstSize += ret; - } - { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); - if (ZSTD_isError(frameSrcSize)) { - return ZSTD_CONTENTSIZE_ERROR; - } - - src = (const BYTE *)src + frameSrcSize; - srcSize -= frameSrcSize; - } - } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ - - if (srcSize) return ZSTD_CONTENTSIZE_ERROR; - - return totalDstSize; -} - -/** ZSTD_getDecompressedSize() : - * compatible with legacy mode - * @return : decompressed size if known, 0 otherwise - note : 0 can mean any of the following : - - frame content is empty - - decompressed size field is not present in frame header - - frame header unknown / not supported - - frame header not complete (`srcSize` too small) */ -unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) -{ - unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); - return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; -} - - -/** ZSTD_decodeFrameHeader() : - * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). - * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) -{ - size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); - if (ZSTD_isError(result)) return result; /* invalid header */ - RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - /* Skip the dictID check in fuzzing mode, because it makes the search - * harder. - */ - RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), - dictionary_wrong, ""); -#endif - if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); - return 0; -} - -static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) -{ - ZSTD_frameSizeInfo frameSizeInfo; - frameSizeInfo.compressedSize = ret; - frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; - return frameSizeInfo; -} - -static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) -{ - ZSTD_frameSizeInfo frameSizeInfo; - memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); - -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) - return ZSTD_findFrameSizeInfoLegacy(src, srcSize); -#endif - - if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) - && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); - assert(ZSTD_isError(frameSizeInfo.compressedSize) || - frameSizeInfo.compressedSize <= srcSize); - return frameSizeInfo; - } else { - const BYTE* ip = (const BYTE*)src; - const BYTE* const ipstart = ip; - size_t remainingSize = srcSize; - size_t nbBlocks = 0; - ZSTD_frameHeader zfh; - - /* Extract Frame Header */ - { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); - if (ZSTD_isError(ret)) - return ZSTD_errorFrameSizeInfo(ret); - if (ret > 0) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - } - - ip += zfh.headerSize; - remainingSize -= zfh.headerSize; - - /* Iterate over each block */ - while (1) { - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) - return ZSTD_errorFrameSizeInfo(cBlockSize); - - if (ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize > remainingSize) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - - ip += ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize; - remainingSize -= ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize; - nbBlocks++; - - if (blockProperties.lastBlock) break; - } - - /* Final frame content checksum */ - if (zfh.checksumFlag) { - if (remainingSize < 4) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - ip += 4; - } - - frameSizeInfo.compressedSize = ip - ipstart; - frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) - ? zfh.frameContentSize - : nbBlocks * zfh.blockSizeMax; - return frameSizeInfo; - } -} - -/** ZSTD_findFrameCompressedSize() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame - * `srcSize` must be at least as large as the frame contained - * @return : the compressed size of the frame starting at `src` */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) -{ - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); - return frameSizeInfo.compressedSize; -} - -/** ZSTD_decompressBound() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame or a skippeable frame - * `srcSize` must be at least as large as the frame contained - * @return : the maximum decompressed size of the compressed source - */ -unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) -{ - unsigned long long bound = 0; - /* Iterate over each frame */ - while (srcSize > 0) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); - size_t const compressedSize = frameSizeInfo.compressedSize; - unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; - if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) - return ZSTD_CONTENTSIZE_ERROR; - assert(srcSize >= compressedSize); - src = (const BYTE*)src + compressedSize; - srcSize -= compressedSize; - bound += decompressedBound; - } - return bound; -} - - -/*-************************************************************* - * Frame decoding - ***************************************************************/ - -/** ZSTD_insertBlock() : - * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) -{ - DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); - ZSTD_checkContinuity(dctx, blockStart); - dctx->previousDstEnd = (const char*)blockStart + blockSize; - return blockSize; -} - - -static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_copyRawBlock"); - if (dst == NULL) { - if (srcSize == 0) return 0; - RETURN_ERROR(dstBuffer_null, ""); - } - RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); - memcpy(dst, src, srcSize); - return srcSize; -} - -static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, - BYTE b, - size_t regenSize) -{ - if (dst == NULL) { - if (regenSize == 0) return 0; - RETURN_ERROR(dstBuffer_null, ""); - } - RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); - memset(dst, b, regenSize); - return regenSize; -} - - -/*! ZSTD_decompressFrame() : - * @dctx must be properly initialized - * will update *srcPtr and *srcSizePtr, - * to make *srcPtr progress by one frame. */ -static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void** srcPtr, size_t *srcSizePtr) -{ - const BYTE* ip = (const BYTE*)(*srcPtr); - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; - BYTE* op = ostart; - size_t remainingSrcSize = *srcSizePtr; - - DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); - - /* check */ - RETURN_ERROR_IF( - remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTDInternalConstants::ZSTD_blockHeaderSize, - srcSize_wrong, ""); - - /* Frame Header */ - { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( - ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); - if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; - RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTDInternalConstants::ZSTD_blockHeaderSize, - srcSize_wrong, ""); - FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); - ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; - } - - /* Loop on each block */ - while (1) { - size_t decodedSize; - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) return cBlockSize; - - ip += ZSTDInternalConstants::ZSTD_blockHeaderSize; - remainingSrcSize -= ZSTDInternalConstants::ZSTD_blockHeaderSize; - RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); - - switch(blockProperties.blockType) - { - case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); - break; - case bt_raw : - decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); - break; - case bt_rle : - decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize); - break; - case bt_reserved : - default: - RETURN_ERROR(corruption_detected, "invalid block type"); - } - - if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag) - XXH64_update(&dctx->xxhState, op, decodedSize); - if (decodedSize != 0) - op += decodedSize; - assert(ip != NULL); - ip += cBlockSize; - remainingSrcSize -= cBlockSize; - if (blockProperties.lastBlock) break; - } - - if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { - RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, - corruption_detected, ""); - } - if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); - U32 checkRead; - RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); - checkRead = MEM_readLE32(ip); - RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); - ip += 4; - remainingSrcSize -= 4; - } - - /* Allow caller to get size read */ - *srcPtr = ip; - *srcSizePtr = remainingSrcSize; - return op-ostart; -} - -static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, - const ZSTD_DDict* ddict) -{ - void* const dststart = dst; - int moreThan1Frame = 0; - - DEBUGLOG(5, "ZSTD_decompressMultiFrame"); - assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ - - if (ddict) { - dict = ZSTD_DDict_dictContent(ddict); - dictSize = ZSTD_DDict_dictSize(ddict); - } - - while (srcSize >= ZSTD_startingInputLength(dctx->format)) { - -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { - size_t decodedSize; - size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); - if (ZSTD_isError(frameSize)) return frameSize; - RETURN_ERROR_IF(dctx->staticSize, memory_allocation, - "legacy support is not compatible with static dctx"); - - decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); - if (ZSTD_isError(decodedSize)) return decodedSize; - - assert(decodedSize <=- dstCapacity); - dst = (BYTE*)dst + decodedSize; - dstCapacity -= decodedSize; - - src = (const BYTE*)src + frameSize; - srcSize -= frameSize; - - continue; - } -#endif - - { U32 const magicNumber = MEM_readLE32(src); - DEBUGLOG(4, "reading magic number %08X (expecting %08X)", - (unsigned)magicNumber, ZSTD_MAGICNUMBER); - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t const skippableSize = readSkippableFrameSize(src, srcSize); - FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); - assert(skippableSize <= srcSize); - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } } - - if (ddict) { - /* we were called from ZSTD_decompress_usingDDict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); - } else { - /* this will initialize correctly with no dict if dict == NULL, so - * use this in all cases but ddict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); - } - ZSTD_checkContinuity(dctx, dst); - - { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, - &src, &srcSize); - RETURN_ERROR_IF( - (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) - && (moreThan1Frame==1), - srcSize_wrong, - "at least one frame successfully completed, but following " - "bytes are garbage: it's more likely to be a srcSize error, " - "specifying more bytes than compressed size of frame(s). This " - "error message replaces ERROR(prefix_unknown), which would be " - "confusing, as the first header is actually correct. Note that " - "one could be unlucky, it might be a corruption error instead, " - "happening right at the place where we expect zstd magic " - "bytes. But this is _much_ less likely than a srcSize field " - "error."); - if (ZSTD_isError(res)) return res; - assert(res <= dstCapacity); - if (res != 0) - dst = (BYTE*)dst + res; - dstCapacity -= res; - } - moreThan1Frame = 1; - } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ - - RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); - - return (BYTE*)dst - (BYTE*)dststart; -} - -size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize) -{ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); -} - - -static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) -{ - switch (dctx->dictUses) { - default: - assert(0 /* Impossible */); - /* fall-through */ - case ZSTD_dont_use: - ZSTD_clearDict(dctx); - return NULL; - case ZSTD_use_indefinitely: - return dctx->ddict; - case ZSTD_use_once: - dctx->dictUses = ZSTD_dont_use; - return dctx->ddict; - } -} - -size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); -} - - -size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ -#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) - size_t regenSize; - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); - regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); - ZSTD_freeDCtx(dctx); - return regenSize; -#else /* stack mode */ - ZSTD_DCtx dctx; - ZSTD_initDCtx_internal(&dctx); - return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); -#endif -} - - -/*-************************************** -* Advanced Streaming Decompression API -* Bufferless and synchronous -****************************************/ -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } - -/** - * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, - * we allow taking a partial block as the input. Currently only raw uncompressed blocks can - * be streamed. - * - * For blocks that can be streamed, this allows us to reduce the latency until we produce - * output, and avoid copying the input. - * - * @param inputSize - The total amount of input that the caller currently has. - */ -static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { - if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) - return dctx->expected; - if (dctx->bType != bt_raw) - return dctx->expected; - return MIN(MAX(inputSize, 1), dctx->expected); -} - -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { - switch(dctx->stage) - { - default: /* should not happen */ - assert(0); - case ZSTDds_getFrameHeaderSize: - case ZSTDds_decodeFrameHeader: - return ZSTDnit_frameHeader; - case ZSTDds_decodeBlockHeader: - return ZSTDnit_blockHeader; - case ZSTDds_decompressBlock: - return ZSTDnit_block; - case ZSTDds_decompressLastBlock: - return ZSTDnit_lastBlock; - case ZSTDds_checkChecksum: - return ZSTDnit_checksum; - case ZSTDds_decodeSkippableHeader: - case ZSTDds_skipFrame: - return ZSTDnit_skippableFrame; - } -} - -static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } - -/** ZSTD_decompressContinue() : - * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) - * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); - /* Sanity check */ - RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); - if (dstCapacity) ZSTD_checkContinuity(dctx, dst); - - switch (dctx->stage) - { - case ZSTDds_getFrameHeaderSize : - assert(src != NULL); - if (dctx->format == ZSTD_f_zstd1) { /* allows header */ - assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ - if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, srcSize); - dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ - dctx->stage = ZSTDds_decodeSkippableHeader; - return 0; - } } - dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); - if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; - memcpy(dctx->headerBuffer, src, srcSize); - dctx->expected = dctx->headerSize - srcSize; - dctx->stage = ZSTDds_decodeFrameHeader; - return 0; - - case ZSTDds_decodeFrameHeader: - assert(src != NULL); - memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - dctx->stage = ZSTDds_decodeBlockHeader; - return 0; - - case ZSTDds_decodeBlockHeader: - { blockProperties_t bp; - size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTDInternalConstants::ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(cBlockSize)) return cBlockSize; - RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); - dctx->expected = cBlockSize; - dctx->bType = bp.blockType; - dctx->rleSize = bp.origSize; - if (cBlockSize) { - dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; - return 0; - } - /* empty block */ - if (bp.lastBlock) { - if (dctx->fParams.checksumFlag) { - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* end of frame */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; /* jump to next header */ - dctx->stage = ZSTDds_decodeBlockHeader; - } - return 0; - } - - case ZSTDds_decompressLastBlock: - case ZSTDds_decompressBlock: - DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); - { size_t rSize; - switch(dctx->bType) - { - case bt_compressed: - DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); - dctx->expected = 0; /* Streaming not supported */ - break; - case bt_raw : - assert(srcSize <= dctx->expected); - rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); - FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); - assert(rSize == srcSize); - dctx->expected -= rSize; - break; - case bt_rle : - rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); - dctx->expected = 0; /* Streaming not supported */ - break; - case bt_reserved : /* should never happen */ - default: - RETURN_ERROR(corruption_detected, "invalid block type"); - } - FORWARD_IF_ERROR(rSize, ""); - RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); - DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); - dctx->decodedSize += rSize; - if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); - dctx->previousDstEnd = (char*)dst + rSize; - - /* Stay on the same stage until we are finished streaming the block. */ - if (dctx->expected > 0) { - return rSize; - } - - if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ - DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); - RETURN_ERROR_IF( - dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && dctx->decodedSize != dctx->fParams.frameContentSize, - corruption_detected, ""); - if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* ends here */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->stage = ZSTDds_decodeBlockHeader; - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - } - return rSize; - } - - case ZSTDds_checkChecksum: - assert(srcSize == 4); /* guaranteed by dctx->expected */ - { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); - U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } - - case ZSTDds_decodeSkippableHeader: - assert(src != NULL); - assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); - memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ - dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ - dctx->stage = ZSTDds_skipFrame; - return 0; - - case ZSTDds_skipFrame: - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - - default: - assert(0); /* impossible */ - RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ - } -} - - -static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - dctx->dictEnd = dctx->previousDstEnd; - dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); - dctx->prefixStart = dict; - dctx->previousDstEnd = (const char*)dict + dictSize; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentBeginForFuzzing = dctx->prefixStart; - dctx->dictContentEndForFuzzing = dctx->previousDstEnd; -#endif - return 0; -} - -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of entropy tables read */ -size_t -ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize) -{ - const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize; - - RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); - assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ - dictPtr += 8; /* skip header = magic + dictID */ - - ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); - ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); - ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); - { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ - size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); -#ifdef HUF_FORCE_DECOMPRESS_X1 - /* in minimal huffman, we always use X1 variants */ - size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, - workspace, workspaceSize); -#else - size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, - workspace, workspaceSize); -#endif - RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); - dictPtr += hSize; - } - - { short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff, offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->OFTable, - offcodeNCount, offcodeMaxValue, - ZSTDConstants::OF_base, ZSTDConstants::OF_bits, - offcodeLog); - dictPtr += offcodeHeaderSize; - } - - { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->MLTable, - matchlengthNCount, matchlengthMaxValue, - ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits, - matchlengthLog); - dictPtr += matchlengthHeaderSize; - } - - { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->LLTable, - litlengthNCount, litlengthMaxValue, - ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits, - litlengthLog); - dictPtr += litlengthHeaderSize; - } - - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); - { int i; - size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); - for (i=0; i<3; i++) { - U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; - RETURN_ERROR_IF(rep==0 || rep > dictContentSize, - dictionary_corrupted, ""); - entropy->rep[i] = rep; - } } - - return dictPtr - (const BYTE*)dict; -} - -static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); - { U32 const magic = MEM_readLE32(dict); - if (magic != ZSTD_MAGIC_DICTIONARY) { - return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ - } } - dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); - - /* load entropy tables */ - { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); - RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); - dict = (const char*)dict + eSize; - dictSize -= eSize; - } - dctx->litEntropy = dctx->fseEntropy = 1; - - /* reference dictionary content */ - return ZSTD_refDictContent(dctx, dict, dictSize); -} - -static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; - -size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) -{ - assert(dctx != NULL); - dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ - dctx->stage = ZSTDds_getFrameHeaderSize; - dctx->decodedSize = 0; - dctx->previousDstEnd = NULL; - dctx->prefixStart = NULL; - dctx->virtualStart = NULL; - dctx->dictEnd = NULL; - dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - dctx->bType = bt_reserved; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->entropy.LLTable; - dctx->MLTptr = dctx->entropy.MLTable; - dctx->OFTptr = dctx->entropy.OFTable; - dctx->HUFptr = dctx->entropy.hufTable; - return 0; -} - -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); - if (dict && dictSize) - RETURN_ERROR_IF( - ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), - dictionary_corrupted, ""); - return 0; -} - - -/* ====== ZSTD_DDict ====== */ - -size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); - assert(dctx != NULL); - if (ddict) { - const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); - size_t const dictSize = ZSTD_DDict_dictSize(ddict); - const void* const dictEnd = dictStart + dictSize; - dctx->ddictIsCold = (dctx->dictEnd != dictEnd); - DEBUGLOG(4, "DDict is %s", - dctx->ddictIsCold ? "~cold~" : "hot!"); - } - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); - if (ddict) { /* NULL ddict is equivalent to no dictionary */ - ZSTD_copyDDictParameters(dctx, ddict); - } - return 0; -} - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) -{ - if (dictSize < 8) return 0; - if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; - return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); -} - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompress frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary (most common case). - * - The frame was built with dictID intentionally removed. - * Needed dictionary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, frame header could not be decoded. - * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to use - * ZSTD_getFrameHeader(), which will provide a more precise error code. */ -unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) -{ - ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; - size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); - if (ZSTD_isError(hError)) return 0; - return zfp.dictID; -} - - -/*! ZSTD_decompress_usingDDict() : -* Decompression using a pre-digested Dictionary -* Use dictionary without significant overhead. */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict) -{ - /* pass content and size in case legacy frames are encountered */ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, - NULL, 0, - ddict); -} - - -/*===================================== -* Streaming decompression -*====================================*/ - -ZSTD_DStream* ZSTD_createDStream(void) -{ - DEBUGLOG(3, "ZSTD_createDStream"); - return ZSTD_createDStream_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) -{ - return ZSTD_initStaticDCtx(workspace, workspaceSize); -} - -ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) -{ - return ZSTD_createDCtx_advanced(customMem); -} - -size_t ZSTD_freeDStream(ZSTD_DStream* zds) -{ - return ZSTD_freeDCtx(zds); -} - - -/* *** Initialization *** */ - -size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTDInternalConstants::ZSTD_blockHeaderSize; } -size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } - -size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - if (dict && dictSize != 0) { - dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); - RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); - dctx->ddict = dctx->ddictLocal; - dctx->dictUses = ZSTD_use_indefinitely; - } - return 0; -} - -size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); -} - -size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); -} - -size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) -{ - FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); - dctx->dictUses = ZSTD_use_once; - return 0; -} - -size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) -{ - return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); -} - - -/* ZSTD_initDStream_usingDict() : - * return : expected size, aka ZSTD_startingInputLength(). - * this function cannot fail */ -size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) -{ - DEBUGLOG(4, "ZSTD_initDStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); - return ZSTD_startingInputLength(zds->format); -} - -/* note : this variant can't fail */ -size_t ZSTD_initDStream(ZSTD_DStream* zds) -{ - DEBUGLOG(4, "ZSTD_initDStream"); - return ZSTD_initDStream_usingDDict(zds, NULL); -} - -/* ZSTD_initDStream_usingDDict() : - * ddict will just be referenced, and must outlive decompression session - * this function cannot fail */ -size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) -{ - FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); - return ZSTD_startingInputLength(dctx->format); -} - -/* ZSTD_resetDStream() : - * return : expected size, aka ZSTD_startingInputLength(). - * this function cannot fail */ -size_t ZSTD_resetDStream(ZSTD_DStream* dctx) -{ - FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); - return ZSTD_startingInputLength(dctx->format); -} - - -size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - if (ddict) { - dctx->ddict = ddict; - dctx->dictUses = ZSTD_use_indefinitely; - } - return 0; -} - -/* ZSTD_DCtx_setMaxWindowSize() : - * note : no direct equivalence in ZSTD_DCtx_setParameter, - * since this version sets windowSize, and the other sets windowLog */ -size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) -{ - ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); - size_t const min = (size_t)1 << bounds.lowerBound; - size_t const max = (size_t)1 << bounds.upperBound; - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); - RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); - dctx->maxWindowSize = maxWindowSize; - return 0; -} - -size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) -{ - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); -} - -ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) -{ - ZSTD_bounds bounds = { 0, 0, 0 }; - switch(dParam) { - case ZSTD_d_windowLogMax: - bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; - bounds.upperBound = ZSTD_WINDOWLOG_MAX; - return bounds; - case ZSTD_d_format: - bounds.lowerBound = (int)ZSTD_f_zstd1; - bounds.upperBound = (int)ZSTD_f_zstd1_magicless; - ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); - return bounds; - case ZSTD_d_stableOutBuffer: - bounds.lowerBound = (int)ZSTD_obm_buffered; - bounds.upperBound = (int)ZSTD_obm_stable; - return bounds; - default:; - } - bounds.error = ERROR(parameter_unsupported); - return bounds; -} - -/* ZSTD_dParam_withinBounds: - * @return 1 if value is within dParam bounds, - * 0 otherwise */ -static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) -{ - ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); - if (ZSTD_isError(bounds.error)) return 0; - if (value < bounds.lowerBound) return 0; - if (value > bounds.upperBound) return 0; - return 1; -} - -#define CHECK_DBOUNDS(p,v) { \ - RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ -} - -size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - switch(dParam) { - case ZSTD_d_windowLogMax: - if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; - CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); - dctx->maxWindowSize = ((size_t)1) << value; - return 0; - case ZSTD_d_format: - CHECK_DBOUNDS(ZSTD_d_format, value); - dctx->format = (ZSTD_format_e)value; - return 0; - case ZSTD_d_stableOutBuffer: - CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); - dctx->outBufferMode = (ZSTD_outBufferMode_e)value; - return 0; - default:; - } - RETURN_ERROR(parameter_unsupported, ""); -} - -size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) -{ - if ( (reset == ZSTD_reset_session_only) - || (reset == ZSTD_reset_session_and_parameters) ) { - dctx->streamStage = zdss_init; - dctx->noForwardProgress = 0; - } - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - dctx->format = ZSTD_f_zstd1; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - } - return 0; -} - - -size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) -{ - return ZSTD_sizeof_DCtx(dctx); -} - -size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) -{ - size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); - unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); - size_t const minRBSize = (size_t) neededSize; - RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, - frameParameter_windowTooLarge, ""); - return minRBSize; -} - -size_t ZSTD_estimateDStreamSize(size_t windowSize) -{ - size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - size_t const inBuffSize = blockSize; /* no block can be larger */ - size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); - return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; -} - -size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) -{ - U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ - ZSTD_frameHeader zfh; - size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); - if (ZSTD_isError(err)) return err; - RETURN_ERROR_IF(err>0, srcSize_wrong, ""); - RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, - frameParameter_windowTooLarge, ""); - return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); -} - - -/* ***** Decompression ***** */ - -static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) -{ - return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; -} - -static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) -{ - if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) - zds->oversizedDuration++; - else - zds->oversizedDuration = 0; -} - -static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) -{ - return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; -} - -/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ -static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) -{ - ZSTD_outBuffer const expect = zds->expectedOutBuffer; - /* No requirement when ZSTD_obm_stable is not enabled. */ - if (zds->outBufferMode != ZSTD_obm_stable) - return 0; - /* Any buffer is allowed in zdss_init, this must be the same for every other call until - * the context is reset. - */ - if (zds->streamStage == zdss_init) - return 0; - /* The buffer must match our expectation exactly. */ - if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) - return 0; - RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!"); -} - -/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() - * and updates the stage and the output buffer state. This call is extracted so it can be - * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. - * NOTE: You must break after calling this function since the streamStage is modified. - */ -static size_t ZSTD_decompressContinueStream( - ZSTD_DStream* zds, char** op, char* oend, - void const* src, size_t srcSize) { - int const isSkipFrame = ZSTD_isSkipFrame(zds); - if (zds->outBufferMode == ZSTD_obm_buffered) { - size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; - size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, dstSize, src, srcSize); - FORWARD_IF_ERROR(decodedSize, ""); - if (!decodedSize && !isSkipFrame) { - zds->streamStage = zdss_read; - } else { - zds->outEnd = zds->outStart + decodedSize; - zds->streamStage = zdss_flush; - } - } else { - /* Write directly into the output buffer */ - size_t const dstSize = isSkipFrame ? 0 : oend - *op; - size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); - FORWARD_IF_ERROR(decodedSize, ""); - *op += decodedSize; - /* Flushing is not needed. */ - zds->streamStage = zdss_read; - assert(*op <= oend); - assert(zds->outBufferMode == ZSTD_obm_stable); - } - return 0; -} - -size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - const char* const src = (const char*)input->src; - const char* const istart = input->pos != 0 ? src + input->pos : src; - const char* const iend = input->size != 0 ? src + input->size : src; - const char* ip = istart; - char* const dst = (char*)output->dst; - char* const ostart = output->pos != 0 ? dst + output->pos : dst; - char* const oend = output->size != 0 ? dst + output->size : dst; - char* op = ostart; - U32 someMoreWork = 1; - - DEBUGLOG(5, "ZSTD_decompressStream"); - RETURN_ERROR_IF( - input->pos > input->size, - srcSize_wrong, - "forbidden. in: pos: %u vs size: %u", - (U32)input->pos, (U32)input->size); - RETURN_ERROR_IF( - output->pos > output->size, - dstSize_tooSmall, - "forbidden. out: pos: %u vs size: %u", - (U32)output->pos, (U32)output->size); - DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); - FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); - - while (someMoreWork) { - switch(zds->streamStage) - { - case zdss_init : - DEBUGLOG(5, "stage zdss_init => transparent reset "); - zds->streamStage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - zds->legacyVersion = 0; - zds->hostageByte = 0; - zds->expectedOutBuffer = *output; - /* fall-through */ - - case zdss_loadHeader : - DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) - if (zds->legacyVersion) { - RETURN_ERROR_IF(zds->staticSize, memory_allocation, - "legacy support is incompatible with static dctx"); - { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); - if (hint==0) zds->streamStage = zdss_init; - return hint; - } } -#endif - { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); - DEBUGLOG(5, "header size : %u", (U32)hSize); - if (ZSTD_isError(hSize)) { -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) - U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); - if (legacyVersion) { - ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); - const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; - size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; - DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); - RETURN_ERROR_IF(zds->staticSize, memory_allocation, - "legacy support is incompatible with static dctx"); - FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, - zds->previousLegacyVersion, legacyVersion, - dict, dictSize), ""); - zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; - { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); - if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ - return hint; - } } -#endif - return hSize; /* error */ - } - if (hSize != 0) { /* need more input */ - size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ - size_t const remainingInput = (size_t)(iend-ip); - assert(iend >= ip); - if (toLoad > remainingInput) { /* not enough input to load full header */ - if (remainingInput > 0) { - memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); - zds->lhSize += remainingInput; - } - input->pos = input->size; - return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTDInternalConstants::ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ - } - assert(ip != NULL); - memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; - break; - } } - - /* check for single-pass mode opportunity */ - if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && zds->fParams.frameType != ZSTD_skippableFrame - && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); - if (cSize <= (size_t)(iend-istart)) { - /* shortcut : using single-pass mode */ - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); - if (ZSTD_isError(decompressedSize)) return decompressedSize; - DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") - ip = istart + cSize; - op += decompressedSize; - zds->expected = 0; - zds->streamStage = zdss_init; - someMoreWork = 0; - break; - } } - - /* Check output buffer is large enough for ZSTD_odm_stable. */ - if (zds->outBufferMode == ZSTD_obm_stable - && zds->fParams.frameType != ZSTD_skippableFrame - && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { - RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); - } - - /* Consume header (see ZSTDds_decodeFrameHeader) */ - DEBUGLOG(4, "Consume header"); - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); - - if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); - zds->stage = ZSTDds_skipFrame; - } else { - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); - zds->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - zds->stage = ZSTDds_decodeBlockHeader; - } - - /* control buffer memory usage */ - DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", - (U32)(zds->fParams.windowSize >>10), - (U32)(zds->maxWindowSize >> 10) ); - zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, - frameParameter_windowTooLarge, ""); - - /* Adapt buffer sizes to frame header instructions */ - { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered - ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) - : 0; - - ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); - - { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); - int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); - - if (tooSmall || tooLarge) { - size_t const bufferSize = neededInBuffSize + neededOutBuffSize; - DEBUGLOG(4, "inBuff : from %u to %u", - (U32)zds->inBuffSize, (U32)neededInBuffSize); - DEBUGLOG(4, "outBuff : from %u to %u", - (U32)zds->outBuffSize, (U32)neededOutBuffSize); - if (zds->staticSize) { /* static DCtx */ - DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); - assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ - RETURN_ERROR_IF( - bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), - memory_allocation, ""); - } else { - ZSTD_free(zds->inBuff, zds->customMem); - zds->inBuffSize = 0; - zds->outBuffSize = 0; - zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); - RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); - } - zds->inBuffSize = neededInBuffSize; - zds->outBuff = zds->inBuff + zds->inBuffSize; - zds->outBuffSize = neededOutBuffSize; - } } } - zds->streamStage = zdss_read; - /* fall-through */ - - case zdss_read: - DEBUGLOG(5, "stage zdss_read"); - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip); - DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); - if (neededInSize==0) { /* end of frame */ - zds->streamStage = zdss_init; - someMoreWork = 0; - break; - } - if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); - ip += neededInSize; - /* Function modifies the stage so we must break */ - break; - } } - if (ip==iend) { someMoreWork = 0; break; } /* no more input */ - zds->streamStage = zdss_load; - /* fall-through */ - - case zdss_load: - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); - size_t const toLoad = neededInSize - zds->inPos; - int const isSkipFrame = ZSTD_isSkipFrame(zds); - size_t loadedSize; - /* At this point we shouldn't be decompressing a block that we can stream. */ - assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); - if (isSkipFrame) { - loadedSize = MIN(toLoad, (size_t)(iend-ip)); - } else { - RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, - corruption_detected, - "should never happen"); - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); - } - ip += loadedSize; - zds->inPos += loadedSize; - if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ - - /* decode loaded input */ - zds->inPos = 0; /* input is consumed */ - FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); - /* Function modifies the stage so we must break */ - break; - } - case zdss_flush: - { size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); - op += flushedSize; - zds->outStart += flushedSize; - if (flushedSize == toFlushSize) { /* flush completed */ - zds->streamStage = zdss_read; - if ( (zds->outBuffSize < zds->fParams.frameContentSize) - && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { - DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", - (int)(zds->outBuffSize - zds->outStart), - (U32)zds->fParams.blockSizeMax); - zds->outStart = zds->outEnd = 0; - } - break; - } } - /* cannot complete flush */ - someMoreWork = 0; - break; - - default: - assert(0); /* impossible */ - RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ - } } - - /* result */ - input->pos = (size_t)(ip - (const char*)(input->src)); - output->pos = (size_t)(op - (char*)(output->dst)); - - /* Update the expected output buffer for ZSTD_obm_stable. */ - zds->expectedOutBuffer = *output; - - if ((ip==istart) && (op==ostart)) { /* no forward progress */ - zds->noForwardProgress ++; - if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { - RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); - RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); - assert(0); - } - } else { - zds->noForwardProgress = 0; - } - { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); - if (!nextSrcSizeHint) { /* frame fully decoded */ - if (zds->outEnd == zds->outStart) { /* output fully flushed */ - if (zds->hostageByte) { - if (input->pos >= input->size) { - /* can't release hostage (not present) */ - zds->streamStage = zdss_read; - return 1; - } - input->pos++; /* release hostage */ - } /* zds->hostageByte */ - return 0; - } /* zds->outEnd == zds->outStart */ - if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ - input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ - zds->hostageByte=1; - } - return 1; - } /* nextSrcSizeHint==0 */ - nextSrcSizeHint += ZSTDInternalConstants::ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ - assert(zds->inPos <= nextSrcSizeHint); - nextSrcSizeHint -= zds->inPos; /* part already loaded*/ - return nextSrcSizeHint; - } -} - -size_t ZSTD_decompressStream_simpleArgs ( - ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos) -{ - ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; - ZSTD_inBuffer input = { src, srcSize, *srcPos }; - /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ - size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); - *dstPos = output.pos; - *srcPos = input.pos; - return cErr; -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* zstd_decompress_block : - * this module takes care of decompressing _compressed_ block */ - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ - /* prefetch */ - /* low level memory routines */ - - - - - - /* ZSTD_DCtx */ - /* ZSTD_DDictDictContent */ - -namespace duckdb_zstd { -/*_******************************************************* -* Macros -**********************************************************/ - -/* These two optional macros force the use one way or another of the two - * ZSTD_decompressSequences implementations. You can't force in both directions - * at the same time. - */ -#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" -#endif - - -/*_******************************************************* -* Memory operations -**********************************************************/ -static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } - - -/*-************************************************************* - * Block decoding - ***************************************************************/ - -/*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr) -{ - RETURN_ERROR_IF(srcSize < ZSTDInternalConstants::ZSTD_blockHeaderSize, srcSize_wrong, ""); - - { U32 const cBlockHeader = MEM_readLE24(src); - U32 const cSize = cBlockHeader >> 3; - bpPtr->lastBlock = cBlockHeader & 1; - bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); - bpPtr->origSize = cSize; /* only useful for RLE */ - if (bpPtr->blockType == bt_rle) return 1; - RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); - return cSize; - } -} - - -/* Hidden declaration for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize); -/*! ZSTD_decodeLiteralsBlock() : - * @return : nb of bytes read from src (< srcSize ) - * note : symbol not declared but exposed for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ -{ - DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); - RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); - - { const BYTE* const istart = (const BYTE*) src; - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); - - switch(litEncType) - { - case set_repeat: - DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); - RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); - /* fall-through */ - - case set_compressed: - RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); - { size_t lhSize, litSize, litCSize; - U32 singleStream=0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = MEM_readLE32(istart); - size_t hufSuccess; - switch(lhlCode) - { - case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ - /* 2 - 2 - 10 - 10 */ - singleStream = !lhlCode; - lhSize = 3; - litSize = (lhc >> 4) & 0x3FF; - litCSize = (lhc >> 14) & 0x3FF; - break; - case 2: - /* 2 - 2 - 14 - 14 */ - lhSize = 4; - litSize = (lhc >> 4) & 0x3FFF; - litCSize = lhc >> 18; - break; - case 3: - /* 2 - 2 - 18 - 18 */ - lhSize = 5; - litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); - break; - } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); - - /* prefetch huffman table if cold */ - if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { - PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); - } - - if (litEncType==set_repeat) { - if (singleStream) { - hufSuccess = HUF_decompress1X_usingDTable_bmi2( - dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); - } else { - hufSuccess = HUF_decompress4X_usingDTable_bmi2( - dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); - } - } else { - if (singleStream) { -#if defined(HUF_FORCE_DECOMPRESS_X2) - hufSuccess = HUF_decompress1X_DCtx_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace)); -#else - hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); -#endif - } else { - hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); - } - } - - RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); - - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - dctx->litEntropy = 1; - if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return litCSize + lhSize; - } - - case set_basic: - { size_t litSize, lhSize; - U32 const lhlCode = ((istart[0]) >> 2) & 3; - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = MEM_readLE24(istart) >> 4; - break; - } - - if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); - memcpy(dctx->litBuffer, istart+lhSize, litSize); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return lhSize+litSize; - } - /* direct reference into compressed stream */ - dctx->litPtr = istart+lhSize; - dctx->litSize = litSize; - return lhSize+litSize; - } - - case set_rle: - { U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t litSize, lhSize; - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = MEM_readLE24(istart) >> 4; - RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); - break; - } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - return lhSize+1; - } - default: - RETURN_ERROR(corruption_detected, "impossible"); - } - } -} - -/* Default FSE distribution tables. - * These are pre-calculated FSE decoding tables using default distributions as defined in specification : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions - * They were generated programmatically with following method : - * - start from default distributions, present in /lib/common/zstd_internal.h - * - generate tables normally, using ZSTD_buildFSETable() - * - printout the content of tables - * - pretify output, report below, test with fuzzer to ensure it's correct */ - -/* Default FSE distribution table for Literal Lengths */ -static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; - DTableH->fastMode = 0; - - cell->nbBits = 0; - cell->nextState = 0; - assert(nbAddBits < 255); - cell->nbAdditionalBits = (BYTE)nbAddBits; - cell->baseValue = baseValue; -} - - -/* ZSTD_buildFSETable() : - * generate FSE decoding table for one symbol (ll, ml or off) - * cannot fail if input is valid => - * all inputs are presumed validated at this stage */ -void -ZSTD_buildFSETable(ZSTD_seqSymbol* dt, - const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog) -{ - ZSTD_seqSymbol* const tableDecode = dt+1; - U16 symbolNext[MaxSeq+1]; - - U32 const maxSV1 = maxSymbolValue + 1; - U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; - - /* Sanity Checks */ - assert(maxSymbolValue <= MaxSeq); - assert(tableLog <= MaxFSELog); - - /* Init, lay down lowprob symbols */ - { ZSTD_seqSymbol_header DTableH; - DTableH.tableLog = tableLog; - DTableH.fastMode = 1; - { S16 const largeLimit= (S16)(1 << (tableLog-1)); - U32 s; - for (s=0; s= largeLimit) DTableH.fastMode=0; - assert(normalizedCounter[s]>=0); - symbolNext[s] = (U16)normalizedCounter[s]; - } } } - memcpy(dt, &DTableH, sizeof(DTableH)); - } - - /* Spread symbols */ - { U32 const tableMask = tableSize-1; - U32 const step = FSE_TABLESTEP(tableSize); - U32 s, position = 0; - for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } } - assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } - - /* Build Decoding table */ - { U32 u; - for (u=0; u max, corruption_detected, ""); - { U32 const symbol = *(const BYTE*)src; - U32 const baseline = baseValue[symbol]; - U32 const nbBits = nbAdditionalBits[symbol]; - ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); - } - *DTablePtr = DTableSpace; - return 1; - case set_basic : - *DTablePtr = defaultTable; - return 0; - case set_repeat: - RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); - /* prefetch FSE table if used */ - if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { - const void* const pStart = *DTablePtr; - size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); - PREFETCH_AREA(pStart, pSize); - } - return 0; - case set_compressed : - { unsigned tableLog; - S16 norm[MaxSeq+1]; - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); - RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); - ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); - *DTablePtr = DTableSpace; - return headerSize; - } - default : - assert(0); - RETURN_ERROR(GENERIC, "impossible"); - } -} - -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize) -{ - const BYTE* const istart = (const BYTE* const)src; - const BYTE* const iend = istart + srcSize; - const BYTE* ip = istart; - int nbSeq; - DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); - - /* check */ - RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); - - /* SeqHead */ - nbSeq = *ip++; - if (!nbSeq) { - *nbSeqPtr=0; - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); - return 1; - } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) { - RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - } else { - RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); - nbSeq = ((nbSeq-0x80)<<8) + *ip++; - } - } - *nbSeqPtr = nbSeq; - - /* FSE table descriptors */ - RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ - { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); - symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); - symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); - ip++; - - /* Build DTables */ - { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, - LLtype, MaxLL, LLFSELog, - ip, iend-ip, - ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits, - LL_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += llhSize; - } - - { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, - OFtype, MaxOff, OffFSELog, - ip, iend-ip, - ZSTDConstants::OF_base, ZSTDConstants::OF_bits, - OF_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += ofhSize; - } - - { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, - MLtype, MaxML, MLFSELog, - ip, iend-ip, - ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits, - ML_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += mlhSize; - } - } - - return ip-istart; -} - - -typedef struct { - size_t litLength; - size_t matchLength; - size_t offset; - const BYTE* match; -} seq_t; - -typedef struct { - size_t state; - const ZSTD_seqSymbol* table; -} ZSTD_fseState; - -typedef struct { - BIT_DStream_t DStream; - ZSTD_fseState stateLL; - ZSTD_fseState stateOffb; - ZSTD_fseState stateML; - size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* prefixStart; - const BYTE* dictEnd; - size_t pos; -} seqState_t; - -/*! ZSTD_overlapCopy8() : - * Copies 8 bytes from ip to op and updates op and ip where ip <= op. - * If the offset is < 8 then the offset is spread to at least 8 bytes. - * - * Precondition: *ip <= *op - * Postcondition: *op - *op >= 8 - */ -HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { - assert(*ip <= *op); - if (offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ - int const sub2 = dec64table[offset]; - (*op)[0] = (*ip)[0]; - (*op)[1] = (*ip)[1]; - (*op)[2] = (*ip)[2]; - (*op)[3] = (*ip)[3]; - *ip += dec32table[offset]; - ZSTD_copy4(*op+4, *ip); - *ip -= sub2; - } else { - ZSTD_copy8(*op, *ip); - } - *ip += 8; - *op += 8; - assert(*op - *ip >= 8); -} - -/*! ZSTD_safecopy() : - * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer - * and write up to 16 bytes past oend_w (op >= oend_w is allowed). - * This function is only called in the uncommon case where the sequence is near the end of the block. It - * should be fast for a single long sequence, but can be slow for several short sequences. - * - * @param ovtype controls the overlap detection - * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. - * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. - * The src buffer must be before the dst buffer. - */ -static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { - ptrdiff_t const diff = op - ip; - BYTE* const oend = op + length; - - assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || - (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); - - if (length < 8) { - /* Handle short lengths. */ - while (op < oend) *op++ = *ip++; - return; - } - if (ovtype == ZSTD_overlap_src_before_dst) { - /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ - assert(length >= 8); - ZSTD_overlapCopy8(&op, &ip, diff); - assert(op - ip >= 8); - assert(op <= oend); - } - - if (oend <= oend_w) { - /* No risk of overwrite. */ - ZSTD_wildcopy(op, ip, length, ovtype); - return; - } - if (op <= oend_w) { - /* Wildcopy until we get close to the end. */ - assert(oend > oend_w); - ZSTD_wildcopy(op, ip, oend_w - op, ovtype); - ip += oend_w - op; - op = oend_w; - } - /* Handle the leftovers. */ - while (op < oend) *op++ = *ip++; -} - -/* ZSTD_execSequenceEnd(): - * This version handles cases that are near the end of the output buffer. It requires - * more careful checks to make sure there is no overflow. By separating out these hard - * and unlikely cases, we can speed up the common cases. - * - * NOTE: This function needs to be fast for a single long sequence, but doesn't need - * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). - */ -FORCE_NOINLINE -size_t ZSTD_execSequenceEnd(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - - /* bounds checks : careful of address space overflow in 32-bit mode */ - RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); - RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); - assert(op < op + sequenceLength); - assert(oLitEnd < op + sequenceLength); - - /* copy literals */ - ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); - op = oLitEnd; - *litPtr = iLitEnd; - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); - match = dictEnd - (prefixStart-match); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - } } - ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); - return sequenceLength; -} - -HINT_INLINE -size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; - - assert(op != NULL /* Precondition */); - assert(oend_w < oend /* No underflow */); - /* Handle edge cases in a slow path: - * - Read beyond end of literals - * - Match end is within WILDCOPY_OVERLIMIT of oend - * - 32-bit mode and the match length overflows - */ - if (UNLIKELY( - iLitEnd > litLimit || - oMatchEnd > oend_w || - (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) - return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); - - /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ - assert(op <= oLitEnd /* No overflow */); - assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); - assert(oMatchEnd <= oend /* No underflow */); - assert(iLitEnd <= litLimit /* Literal length is in bounds */); - assert(oLitEnd <= oend_w /* Can wildcopy literals */); - assert(oMatchEnd <= oend_w /* Can wildcopy matches */); - - /* Copy Literals: - * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. - * We likely don't need the full 32-byte wildcopy. - */ - assert(WILDCOPY_OVERLENGTH >= 16); - ZSTD_copy16(op, (*litPtr)); - if (UNLIKELY(sequence.litLength > 16)) { - ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); - } - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* Copy Match */ - if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /* offset beyond prefix -> go into extDict */ - RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); - match = dictEnd + (match - prefixStart); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - } } - /* Match within prefix of 1 or more bytes */ - assert(op <= oMatchEnd); - assert(oMatchEnd <= oend_w); - assert(match >= prefixStart); - assert(sequence.matchLength >= 1); - - /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy - * without overlap checking. - */ - if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { - /* We bet on a full wildcopy for matches, since we expect matches to be - * longer than literals (in general). In silesia, ~10% of matches are longer - * than 16 bytes. - */ - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); - return sequenceLength; - } - assert(sequence.offset < WILDCOPY_VECLEN); - - /* Copy 8 bytes and spread the offset to be >= 8. */ - ZSTD_overlapCopy8(&op, &match, sequence.offset); - - /* If the match length is > 8 bytes, then continue with the wildcopy. */ - if (sequence.matchLength > 8) { - assert(op < oMatchEnd); - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); - } - return sequenceLength; -} - -static void -ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) -{ - const void* ptr = dt; - const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", - (U32)DStatePtr->state, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -FORCE_INLINE_TEMPLATE void -ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) -{ - ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; -} - -FORCE_INLINE_TEMPLATE void -ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) -{ - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; -} - -/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum - * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) - * bits before reloading. This value is the maximum number of bytes we read - * after reloading when we are decoding long offsets. - */ -#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ - (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ - ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ - : 0) - -typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; - -FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) -{ - seq_t seq; - ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; - ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; - ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; - U32 const llBase = llDInfo.baseValue; - U32 const mlBase = mlDInfo.baseValue; - U32 const ofBase = ofDInfo.baseValue; - BYTE const llBits = llDInfo.nbAdditionalBits; - BYTE const mlBits = mlDInfo.nbAdditionalBits; - BYTE const ofBits = ofDInfo.nbAdditionalBits; - BYTE const totalBits = llBits+mlBits+ofBits; - - /* sequence */ - { size_t offset; - if (ofBits > 1) { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { - U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } else { - U32 const ll0 = (llBase == 0); - if (LIKELY((ofBits == 0))) { - if (LIKELY(!ll0)) - offset = seqState->prevOffset[0]; - else { - offset = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - } else { - offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); - { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } } } - seq.offset = offset; - } - - seq.matchLength = mlBase; - if (mlBits > 0) - seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); - - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase; - if (llBits > 0) - seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); - - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - - if (prefetch == ZSTD_p_prefetch) { - size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, offset is only used for prefetching */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update - * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). - * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). - * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the - * better option, so it is the default for other compilers. But, if you - * measure that it is worse, please put up a pull request. - */ - { -#if defined(__GNUC__) && !defined(__clang__) - const int kUseUpdateFseState = 1; -#else - const int kUseUpdateFseState = 0; -#endif - if (kUseUpdateFseState) { - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - } else { - ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ - } - } - - return seq; -} - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) -{ - size_t const windowSize = dctx->fParams.windowSize; - /* No dictionary used. */ - if (dctx->dictContentEndForFuzzing == NULL) return 0; - /* Dictionary is our prefix. */ - if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; - /* Dictionary is not our ext-dict. */ - if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; - /* Dictionary is not within our window size. */ - if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; - /* Dictionary is active. */ - return 1; -} - -MEM_STATIC void ZSTD_assertValidSequence( - ZSTD_DCtx const* dctx, - BYTE const* op, BYTE const* oend, - seq_t const seq, - BYTE const* prefixStart, BYTE const* virtualStart) -{ - size_t const windowSize = dctx->fParams.windowSize; - size_t const sequenceSize = seq.litLength + seq.matchLength; - BYTE const* const oLitEnd = op + seq.litLength; - DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - assert(op <= oend); - assert((size_t)(oend - op) >= sequenceSize); - assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); - if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { - size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); - /* Offset must be within the dictionary. */ - assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); - assert(seq.offset <= windowSize + dictSize); - } else { - /* Offset must be within our window. */ - assert(seq.offset <= windowSize); - } -} -#endif - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -FORCE_INLINE_TEMPLATE size_t -DONT_VECTORIZE -ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences_body"); - (void)frame; - - /* Regen sequences */ - if (nbSeq) { - seqState_t seqState; - size_t error = 0; - dctx->fseEntropy = 1; - { U32 i; for (i=0; ientropy.rep[i]; } - RETURN_ERROR_IF( - ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected, ""); - ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - assert(dst != NULL); - - ZSTD_STATIC_ASSERT( - BIT_DStream_unfinished < BIT_DStream_completed && - BIT_DStream_endOfBuffer < BIT_DStream_completed && - BIT_DStream_completed < BIT_DStream_overflow); - -#if defined(__GNUC__) && defined(__x86_64__) - /* Align the decompression loop to 32 + 16 bytes. - * - * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression - * speed swings based on the alignment of the decompression loop. This - * performance swing is caused by parts of the decompression loop falling - * out of the DSB. The entire decompression loop should fit in the DSB, - * when it can't we get much worse performance. You can measure if you've - * hit the good case or the bad case with this perf command for some - * compressed file test.zst: - * - * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ - * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst - * - * If you see most cycles served out of the MITE you've hit the bad case. - * If you see most cycles served out of the DSB you've hit the good case. - * If it is pretty even then you may be in an okay case. - * - * I've been able to reproduce this issue on the following CPUs: - * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 - * Use Instruments->Counters to get DSB/MITE cycles. - * I never got performance swings, but I was able to - * go from the good case of mostly DSB to half of the - * cycles served from MITE. - * - Coffeelake: Intel i9-9900k - * - * I haven't been able to reproduce the instability or DSB misses on any - * of the following CPUS: - * - Haswell - * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH - * - Skylake - * - * If you are seeing performance stability this script can help test. - * It tests on 4 commits in zstd where I saw performance change. - * - * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 - */ - __asm__(".p2align 5"); - __asm__("nop"); - __asm__(".p2align 4"); -#endif - for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); -#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -#endif - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - BIT_reloadDStream(&(seqState.DStream)); - /* gcc and clang both don't like early returns in this loop. - * gcc doesn't like early breaks either. - * Instead save an error and report it at the end. - * When there is an error, don't increment op, so we don't - * overwrite. - */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; - else op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) break; - } - - /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - if (ZSTD_isError(error)) return error; - RETURN_ERROR_IF(nbSeq, corruption_detected, ""); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - } - - return op-ostart; -} - -static size_t -ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -FORCE_INLINE_TEMPLATE size_t -ZSTD_decompressSequencesLong_body( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - (void)frame; - - /* Regen sequences */ - if (nbSeq) { -#define STORED_SEQS 4 -#define STORED_SEQS_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 - seq_t sequences[STORED_SEQS]; - int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); - seqState_t seqState; - int seqNb; - dctx->fseEntropy = 1; - { int i; for (i=0; ientropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; - assert(dst != NULL); - assert(iend >= ip); - RETURN_ERROR_IF( - ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected, ""); - ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - } - - return op-ostart; -} - -static size_t -ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - - - -#if DYNAMIC_BMI2 - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -static TARGET_ATTRIBUTE("bmi2") size_t -ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - -#endif /* DYNAMIC_BMI2 */ - -typedef size_t (*ZSTD_decompressSequences_t)( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame); - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -static size_t -ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - DEBUGLOG(5, "ZSTD_decompressSequences"); - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -/* ZSTD_decompressSequencesLong() : - * decompression function triggered when a minimum share of offsets is considered "long", - * aka out of cache. - * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". - * This function will try to mitigate main memory latency through the use of prefetching */ -static size_t -ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - DEBUGLOG(5, "ZSTD_decompressSequencesLong"); -#if DYNAMIC_BMI2 - if (dctx->bmi2) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); - } -#endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - - - -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -/* ZSTD_getLongOffsetsShare() : - * condition : offTable must be valid - * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) - * compared to maximum possible of (1< 22) total += 1; - } - - assert(tableLog <= OffFSELog); - total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ - - return total; -} -#endif - -size_t -ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame) -{ /* blockType == blockCompressed */ - const BYTE* ip = (const BYTE*)src; - /* isLongOffset must be true if there are long offsets. - * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. - * We don't expect that to be the case in 64-bit mode. - * In block mode, window size is not known, so we have to be conservative. - * (note: but it could be evaluated from current-lowLimit) - */ - ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); - DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); - - RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); - - /* Decode literals section */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; - } - - /* Build Decoding Tables */ - { - /* These macros control at build-time which decompressor implementation - * we use. If neither is defined, we do some inspection and dispatch at - * runtime. - */ -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - int usePrefetchDecoder = dctx->ddictIsCold; -#endif - int nbSeq; - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); - if (ZSTD_isError(seqHSize)) return seqHSize; - ip += seqHSize; - srcSize -= seqHSize; - - RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); - -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - if ( !usePrefetchDecoder - && (!frame || (dctx->fParams.windowSize > (1<<24))) - && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ - U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); - U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ - usePrefetchDecoder = (shareLongOffsets >= minShare); - } -#endif - - dctx->ddictIsCold = 0; - -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - if (usePrefetchDecoder) -#endif -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -#endif - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG - /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -#endif - } -} - - -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) -{ - if (dst != dctx->previousDstEnd) { /* not contiguous */ - dctx->dictEnd = dctx->previousDstEnd; - dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); - dctx->prefixStart = dst; - dctx->previousDstEnd = dst; - } -} - - -size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t dSize; - ZSTD_checkContinuity(dctx, dst); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); - dctx->previousDstEnd = (char*)dst + dSize; - return dSize; -} - -} - -// LICENSE_CHANGE_END - -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_timestamp.hpp -// -// -//===----------------------------------------------------------------------===// - - - -#include "duckdb.hpp" - -namespace duckdb { - -struct Int96 { - uint32_t value[3]; -}; - -int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp); -timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts); -Int96 TimestampToImpalaTimestamp(timestamp_t &ts); -timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts); -timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts); -date_t ParquetIntToDate(const int32_t &raw_date); -dtime_t ParquetIntToTime(const int64_t &raw_time); - -} // namespace duckdb - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #5 -// See the end of this file for a list - - - -#include -#include -#include - -namespace duckdb { - -enum class UnicodeType { INVALID, ASCII, UNICODE }; -enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE }; - -class Utf8Proc { -public: - //! Distinguishes ASCII, Valid UTF8 and Invalid UTF8 strings - static UnicodeType Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason = nullptr, size_t *invalid_pos = nullptr); - //! Performs UTF NFC normalization of string, return value needs to be free'd - static char* Normalize(const char* s, size_t len); - //! Returns whether or not the UTF8 string is valid - static bool IsValid(const char *s, size_t len); - //! Returns the position (in bytes) of the next grapheme cluster - static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos); - //! Returns the position (in bytes) of the previous grapheme cluster - static size_t PreviousGraphemeCluster(const char *s, size_t len, size_t pos); - - //! Transform a codepoint to utf8 and writes it to "c", sets "sz" to the size of the codepoint - static bool CodepointToUtf8(int cp, int &sz, char *c); - //! Returns the codepoint length in bytes when encoded in UTF8 - static int CodepointLength(int cp); - //! Transform a UTF8 string to a codepoint; returns the codepoint and writes the length of the codepoint (in UTF8) to sz - static int32_t UTF8ToCodepoint(const char *c, int &sz); - static size_t RenderWidth(const char *s, size_t len, size_t pos); - -}; - -} - - -// LICENSE_CHANGE_END - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// boolean_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// templated__column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -namespace duckdb { - -template -struct TemplatedParquetValueConversion { - static VALUE_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - D_ASSERT(offset < dict.len / sizeof(VALUE_TYPE)); - return ((VALUE_TYPE *)dict.ptr)[offset]; - } - - static VALUE_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - return plain_data.read(); - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(sizeof(VALUE_TYPE)); - } -}; - -template -class TemplatedColumnReader : public ColumnReader { -public: - TemplatedColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : ColumnReader(reader, move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p) {}; - - shared_ptr dict; - -public: - void Dictionary(shared_ptr data, idx_t num_entries) override { - dict = move(data); - } - - void Offsets(uint32_t *offsets, uint8_t *defines, uint64_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) override { - auto result_ptr = FlatVector::GetData(result); - auto &result_mask = FlatVector::Validity(result); - - idx_t offset_idx = 0; - for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { - if (HasDefines() && defines[row_idx + result_offset] != max_define) { - result_mask.SetInvalid(row_idx + result_offset); - continue; - } - if (filter[row_idx + result_offset]) { - VALUE_TYPE val = VALUE_CONVERSION::DictRead(*dict, offsets[offset_idx++], *this); - result_ptr[row_idx + result_offset] = val; - } else { - offset_idx++; - } - } - } - - void Plain(shared_ptr plain_data, uint8_t *defines, uint64_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) override { - auto result_ptr = FlatVector::GetData(result); - auto &result_mask = FlatVector::Validity(result); - for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { - if (HasDefines() && defines[row_idx + result_offset] != max_define) { - result_mask.SetInvalid(row_idx + result_offset); - continue; - } - if (filter[row_idx + result_offset]) { - VALUE_TYPE val = VALUE_CONVERSION::PlainRead(*plain_data, *this); - result_ptr[row_idx + result_offset] = val; - } else { // there is still some data there that we have to skip over - VALUE_CONVERSION::PlainSkip(*plain_data, *this); - } - } - } -}; - -template -struct CallbackParquetValueConversion { - static DUCKDB_PHYSICAL_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - return TemplatedParquetValueConversion::DictRead(dict, offset, reader); - } - - static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - return FUNC(plain_data.read()); - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(sizeof(PARQUET_PHYSICAL_TYPE)); - } -}; - -} // namespace duckdb - - -namespace duckdb { - -struct BooleanParquetValueConversion; - -class BooleanColumnReader : public TemplatedColumnReader { -public: - BooleanColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader(reader, move(type_p), schema_p, schema_idx_p, - max_define_p, max_repeat_p), - byte_pos(0) {}; - - uint8_t byte_pos; - - void InitializeRead(const std::vector &columns, TProtocol &protocol_p) override { - byte_pos = 0; - TemplatedColumnReader::InitializeRead(columns, protocol_p); - } -}; - -struct BooleanParquetValueConversion { - static bool DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - throw std::runtime_error("Dicts for booleans make no sense"); - } - - static bool PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.available(1); - auto &byte_pos = ((BooleanColumnReader &)reader).byte_pos; - bool ret = (*plain_data.ptr >> byte_pos) & 1; - byte_pos++; - if (byte_pos == 8) { - byte_pos = 0; - plain_data.inc(1); - } - return ret; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - PlainRead(plain_data, reader); - } -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// cast_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { - -//! A column reader that represents a cast over a child reader -class CastColumnReader : public ColumnReader { -public: - CastColumnReader(unique_ptr child_reader, LogicalType target_type); - - unique_ptr child_reader; - DataChunk intermediate_chunk; - -public: - unique_ptr Stats(const std::vector &columns) override; - void InitializeRead(const std::vector &columns, TProtocol &protocol_p) override; - - idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result) override; - - void Skip(idx_t num_values) override; - idx_t GroupRowsAvailable() override; - - uint64_t TotalCompressedSize() override { - return child_reader->TotalCompressedSize(); - } - - idx_t FileOffset() const override { - return child_reader->FileOffset(); - } - - void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override { - child_reader->RegisterPrefetch(transport, allow_merge); - } -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// callback_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - -namespace duckdb { - -template -class CallbackColumnReader - : public TemplatedColumnReader> { - -public: - CallbackColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader>( - reader, move(type_p), schema_p, file_idx_p, max_define_p, max_repeat_p) { - } - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { - this->dict = make_shared(this->reader.allocator, num_entries * sizeof(DUCKDB_PHYSICAL_TYPE)); - auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr; - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = FUNC(dictionary_data->read()); - } - } -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_decimal_utils.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { - -class ParquetDecimalUtils { -public: - template - static PHYSICAL_TYPE ReadDecimalValue(const_data_ptr_t pointer, idx_t size) { - D_ASSERT(size <= sizeof(PHYSICAL_TYPE)); - PHYSICAL_TYPE res = 0; - - auto res_ptr = (uint8_t *)&res; - bool positive = (*pointer & 0x80) == 0; - - // numbers are stored as two's complement so some muckery is required - for (idx_t i = 0; i < size; i++) { - auto byte = *(pointer + (size - i - 1)); - res_ptr[i] = positive ? byte : byte ^ 0xFF; - } - if (!positive) { - res += 1; - return -res; - } - return res; - } - - static unique_ptr CreateReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define, - idx_t max_repeat); -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// list_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { - -class ListColumnReader : public ColumnReader { -public: - ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p, unique_ptr child_column_reader_p); - - idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result_out) override; - - void ApplyPendingSkips(idx_t num_values) override; - - void InitializeRead(const std::vector &columns, TProtocol &protocol_p) override { - child_column_reader->InitializeRead(columns, protocol_p); - } - - idx_t GroupRowsAvailable() override { - return child_column_reader->GroupRowsAvailable() + overflow_child_count; - } - - uint64_t TotalCompressedSize() override { - return child_column_reader->TotalCompressedSize(); - } - - void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override { - child_column_reader->RegisterPrefetch(transport, allow_merge); - } - -private: - unique_ptr child_column_reader; - ResizeableBuffer child_defines; - ResizeableBuffer child_repeats; - uint8_t *child_defines_ptr; - uint8_t *child_repeats_ptr; - - VectorCache read_cache; - Vector read_vector; - - parquet_filter_t child_filter; - - idx_t overflow_child_count; -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// string_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -namespace duckdb { - -struct StringParquetValueConversion { - static string_t DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader); - - static string_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader); - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader); -}; - -class StringColumnReader : public TemplatedColumnReader { -public: - StringColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p); - - unique_ptr dict_strings; - idx_t fixed_width_string_length; - -public: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) override; - - uint32_t VerifyString(const char *str_data, uint32_t str_len); - -protected: - void DictReference(Vector &result) override; - void PlainReference(shared_ptr plain_data, Vector &result) override; -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// struct_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -namespace duckdb { - -class StructColumnReader : public ColumnReader { -public: - StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p, vector> child_readers_p); - - vector> child_readers; - -public: - ColumnReader *GetChildReader(idx_t child_idx); - - void InitializeRead(const std::vector &columns, TProtocol &protocol_p) override; - - idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result) override; - - void Skip(idx_t num_values) override; - idx_t GroupRowsAvailable() override; - uint64_t TotalCompressedSize() override; - void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override; -}; - -} // namespace duckdb - - - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list - -//===----------------------------------------------------------------------===// -// DuckDB -// -// miniz_wrapper.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #6 -// See the end of this file for a list - -/* miniz.c 2.0.8 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing - See "unlicense" statement at the end of this file. - Rich Geldreich , last updated Oct. 13, 2013 - Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt - - Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define - MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). - - * Low-level Deflate/Inflate implementation notes: - - Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or - greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses - approximately as well as zlib. - - Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function - coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory - block large enough to hold the entire file. - - The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. - - * zlib-style API notes: - - miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in - zlib replacement in many apps: - The z_stream struct, optional memory allocation callbacks - deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound - inflateInit/inflateInit2/inflate/inflateEnd - compress, compress2, compressBound, uncompress - CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. - Supports raw deflate streams or standard zlib streams with adler-32 checking. - - Limitations: - The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. - I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but - there are no guarantees that miniz.c pulls this off perfectly. - - * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by - Alex Evans. Supports 1-4 bytes/pixel images. - - * ZIP archive API notes: - - The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to - get the job done with minimal fuss. There are simple API's to retrieve file information, read files from - existing archives, create new archives, append new files to existing archives, or clone archive data from - one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), - or you can specify custom file read/write callbacks. - - - Archive reading: Just call this function to read a single file from a disk archive: - - void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, - size_t *pSize, mz_uint zip_flags); - - For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central - directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. - - - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: - - int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); - - The locate operation can optionally check file comments too, which (as one example) can be used to identify - multiple versions of the same file in an archive. This function uses a simple linear search through the central - directory, so it's not very fast. - - Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and - retrieve detailed info on each file by calling mz_zip_reader_file_stat(). - - - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data - to disk and builds an exact image of the central directory in memory. The central directory image is written - all at once at the end of the archive file when the archive is finalized. - - The archive writer can optionally align each file's local header and file data to any power of 2 alignment, - which can be useful when the archive will be read from optical media. Also, the writer supports placing - arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still - readable by any ZIP tool. - - - Archive appending: The simple way to add a single file to an archive is to call this function: - - mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, - const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); - - The archive will be created if it doesn't already exist, otherwise it'll be appended to. - Note the appending is done in-place and is not an atomic operation, so if something goes wrong - during the operation it's possible the archive could be left without a central directory (although the local - file headers and file data will be fine, so the archive will be recoverable). - - For more complex archive modification scenarios: - 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to - preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the - compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and - you're done. This is safe but requires a bunch of temporary disk space or heap memory. - - 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), - append new files as needed, then finalize the archive which will write an updated central directory to the - original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a - possibility that the archive's central directory could be lost with this method if anything goes wrong, though. - - - ZIP archive support limitations: - No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. - Requires streams capable of seeking. - - * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the - below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. - - * Important: For best perf. be sure to customize the below macros for your target platform: - #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 - #define MINIZ_LITTLE_ENDIAN 1 - #define MINIZ_HAS_64BIT_REGISTERS 1 - - * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz - uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files - (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). -*/ - - - - - -/* Defines to completely disable specific portions of miniz.c: - If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */ - -/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ -#define MINIZ_NO_STDIO - -/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ -/* get/set file times, and the C run-time funcs that get/set times won't be called. */ -/* The current downside is the times written to your archives will be from 1979. */ -#define MINIZ_NO_TIME - -/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ -/* #define MINIZ_NO_ARCHIVE_APIS */ - -/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ -/* #define MINIZ_NO_ARCHIVE_WRITING_APIS */ - -/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ -/*#define MINIZ_NO_ZLIB_APIS */ - -/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ -#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES - -/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. - Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc - callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user - functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ -/*#define MINIZ_NO_MALLOC */ - -#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) -/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ -#define MINIZ_NO_TIME -#endif - -#include - - - -#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) -#include -#endif - -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) -/* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */ -#define MINIZ_X86_OR_X64_CPU 1 -#else -#define MINIZ_X86_OR_X64_CPU 0 -#endif - -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU -/* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */ -#define MINIZ_LITTLE_ENDIAN 1 -#else -#define MINIZ_LITTLE_ENDIAN 0 -#endif - -#if MINIZ_X86_OR_X64_CPU -/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */ -#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 // always 0 because alignment -#else -#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 -#endif - -#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) -/* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */ -#define MINIZ_HAS_64BIT_REGISTERS 1 -#else -#define MINIZ_HAS_64BIT_REGISTERS 0 -#endif - -namespace duckdb_miniz { - -/* ------------------- zlib-style API Definitions. */ - -/* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */ -typedef unsigned long mz_ulong; - -/* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */ -void mz_free(void *p); - -#define MZ_ADLER32_INIT (1) -/* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */ -mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); - -#define MZ_CRC32_INIT (0) -/* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */ -mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); - -/* Compression strategies. */ -enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; - -/* Method */ -#define MZ_DEFLATED 8 - -/* Heap allocation callbacks. -Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. */ -typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); -typedef void (*mz_free_func)(void *opaque, void *address); -typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); - -/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */ -enum { - MZ_NO_COMPRESSION = 0, - MZ_BEST_SPEED = 1, - MZ_BEST_COMPRESSION = 9, - MZ_UBER_COMPRESSION = 10, - MZ_DEFAULT_LEVEL = 6, - MZ_DEFAULT_COMPRESSION = -1 -}; - -#define MZ_VERSION "10.0.3" -#define MZ_VERNUM 0xA030 -#define MZ_VER_MAJOR 10 -#define MZ_VER_MINOR 0 -#define MZ_VER_REVISION 3 -#define MZ_VER_SUBREVISION 0 - -#ifndef MINIZ_NO_ZLIB_APIS - -/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */ -enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; - -/* Return status codes. MZ_PARAM_ERROR is non-standard. */ -enum { - MZ_OK = 0, - MZ_STREAM_END = 1, - MZ_NEED_DICT = 2, - MZ_ERRNO = -1, - MZ_STREAM_ERROR = -2, - MZ_DATA_ERROR = -3, - MZ_MEM_ERROR = -4, - MZ_BUF_ERROR = -5, - MZ_VERSION_ERROR = -6, - MZ_PARAM_ERROR = -10000 -}; - -/* Window bits */ -#define MZ_DEFAULT_WINDOW_BITS 15 - -struct mz_internal_state; - -/* Compression/decompression stream struct. */ -typedef struct mz_stream_s { - const unsigned char *next_in; /* pointer to next byte to read */ - unsigned int avail_in; /* number of bytes available at next_in */ - mz_ulong total_in; /* total number of bytes consumed so far */ - - unsigned char *next_out; /* pointer to next byte to write */ - unsigned int avail_out; /* number of bytes that can be written to next_out */ - mz_ulong total_out; /* total number of bytes produced so far */ - - char *msg; /* error msg (unused) */ - struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */ - - mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */ - mz_free_func zfree; /* optional heap free function (defaults to free) */ - void *opaque; /* heap alloc function user pointer */ - - int data_type; /* data_type (unused) */ - mz_ulong adler; /* adler32 of the source or uncompressed data */ - mz_ulong reserved; /* not used */ -} mz_stream; - -typedef mz_stream *mz_streamp; - -/* Returns the version string of miniz.c. */ -const char *mz_version(void); - -/* mz_deflateInit() initializes a compressor with default options: */ -/* Parameters: */ -/* pStream must point to an initialized mz_stream struct. */ -/* level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */ -/* level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. - */ -/* (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */ -/* Return values: */ -/* MZ_OK on success. */ -/* MZ_STREAM_ERROR if the stream is bogus. */ -/* MZ_PARAM_ERROR if the input parameters are bogus. */ -/* MZ_MEM_ERROR on out of memory. */ -int mz_deflateInit(mz_streamp pStream, int level); - -/* mz_deflateInit2() is like mz_deflate(), except with more control: */ -/* Additional parameters: */ -/* method must be MZ_DEFLATED */ -/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */ -/* mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */ -int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); - -/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */ -int mz_deflateReset(mz_streamp pStream); - -/* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. - */ -/* Parameters: */ -/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ -/* flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */ -/* Return values: */ -/* MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */ -/* MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */ -/* MZ_STREAM_ERROR if the stream is bogus. */ -/* MZ_PARAM_ERROR if one of the parameters is invalid. */ -/* MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */ -int mz_deflate(mz_streamp pStream, int flush); - -/* mz_deflateEnd() deinitializes a compressor: */ -/* Return values: */ -/* MZ_OK on success. */ -/* MZ_STREAM_ERROR if the stream is bogus. */ -int mz_deflateEnd(mz_streamp pStream); - -/* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */ -mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); - -/* Single-call compression functions mz_compress() and mz_compress2(): */ -/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */ -int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); -int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, - int level); - -/* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */ -mz_ulong mz_compressBound(mz_ulong source_len); - -/* Initializes a decompressor. */ -int mz_inflateInit(mz_streamp pStream); - -/* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */ -/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */ -int mz_inflateInit2(mz_streamp pStream, int window_bits); - -/* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */ -/* Parameters: */ -/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ -/* flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */ -/* On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */ -/* MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */ -/* Return values: */ -/* MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */ -/* MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */ -/* MZ_STREAM_ERROR if the stream is bogus. */ -/* MZ_DATA_ERROR if the deflate stream is invalid. */ -/* MZ_PARAM_ERROR if one of the parameters is invalid. */ -/* MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */ -/* with more input data, or with more room in the output buffer (except when using single call decompression, described above). */ -int mz_inflate(mz_streamp pStream, int flush); - -/* Deinitializes a decompressor. */ -int mz_inflateEnd(mz_streamp pStream); - -/* Single-call decompression. */ -/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */ -int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); - -/* Returns a string description of the specified error code, or NULL if the error code is invalid. */ -const char *mz_error(int err); - -/* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */ -/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */ -#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES -typedef unsigned char Byte; -typedef unsigned int uInt; -typedef mz_ulong uLong; -typedef Byte Bytef; -typedef uInt uIntf; -typedef char charf; -typedef int intf; -typedef void *voidpf; -typedef uLong uLongf; -typedef void *voidp; -typedef void *const voidpc; -#define Z_NULL 0 -#define Z_NO_FLUSH MZ_NO_FLUSH -#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH -#define Z_SYNC_FLUSH MZ_SYNC_FLUSH -#define Z_FULL_FLUSH MZ_FULL_FLUSH -#define Z_FINISH MZ_FINISH -#define Z_BLOCK MZ_BLOCK -#define Z_OK MZ_OK -#define Z_STREAM_END MZ_STREAM_END -#define Z_NEED_DICT MZ_NEED_DICT -#define Z_ERRNO MZ_ERRNO -#define Z_STREAM_ERROR MZ_STREAM_ERROR -#define Z_DATA_ERROR MZ_DATA_ERROR -#define Z_MEM_ERROR MZ_MEM_ERROR -#define Z_BUF_ERROR MZ_BUF_ERROR -#define Z_VERSION_ERROR MZ_VERSION_ERROR -#define Z_PARAM_ERROR MZ_PARAM_ERROR -#define Z_NO_COMPRESSION MZ_NO_COMPRESSION -#define Z_BEST_SPEED MZ_BEST_SPEED -#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION -#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION -#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY -#define Z_FILTERED MZ_FILTERED -#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY -#define Z_RLE MZ_RLE -#define Z_FIXED MZ_FIXED -#define Z_DEFLATED MZ_DEFLATED -#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS -#define alloc_func mz_alloc_func -#define free_func mz_free_func -#define internal_state mz_internal_state -#define z_stream mz_stream -#define deflateInit mz_deflateInit -#define deflateInit2 mz_deflateInit2 -#define deflateReset mz_deflateReset -#define deflate mz_deflate -#define deflateEnd mz_deflateEnd -#define deflateBound mz_deflateBound -#define compress mz_compress -#define compress2 mz_compress2 -#define compressBound mz_compressBound -#define inflateInit mz_inflateInit -#define inflateInit2 mz_inflateInit2 -#define inflate mz_inflate -#define inflateEnd mz_inflateEnd -#define uncompress mz_uncompress -#define crc32 mz_crc32 -#define adler32 mz_adler32 -#define MAX_WBITS 15 -#define MAX_MEM_LEVEL 9 -#define zError mz_error -#define ZLIB_VERSION MZ_VERSION -#define ZLIB_VERNUM MZ_VERNUM -#define ZLIB_VER_MAJOR MZ_VER_MAJOR -#define ZLIB_VER_MINOR MZ_VER_MINOR -#define ZLIB_VER_REVISION MZ_VER_REVISION -#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION -#define zlibVersion mz_version -#define zlib_version mz_version() -#endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ - -#endif /* MINIZ_NO_ZLIB_APIS */ - -} - - -#include -#include -#include -#include - -namespace duckdb_miniz { - -/* ------------------- Types and macros */ -typedef unsigned char mz_uint8; -typedef signed short mz_int16; -typedef unsigned short mz_uint16; -typedef unsigned int mz_uint32; -typedef unsigned int mz_uint; -typedef int64_t mz_int64; -typedef uint64_t mz_uint64; -typedef int mz_bool; - -#define MZ_FALSE (0) -#define MZ_TRUE (1) - -/* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */ -#ifdef _MSC_VER -#define MZ_MACRO_END while (0, 0) -#else -#define MZ_MACRO_END while (0) -#endif - -#ifdef MINIZ_NO_STDIO -#define MZ_FILE void * -#else -#include -#define MZ_FILE FILE -#endif /* #ifdef MINIZ_NO_STDIO */ - -#ifdef MINIZ_NO_TIME -typedef struct mz_dummy_time_t_tag -{ - int m_dummy; -} mz_dummy_time_t; -#define MZ_TIME_T mz_dummy_time_t -#else -#define MZ_TIME_T time_t -#endif - -#define MZ_ASSERT(x) assert(x) - -#ifdef MINIZ_NO_MALLOC -#define MZ_MALLOC(x) NULL -#define MZ_FREE(x) (void)x, ((void)0) -#define MZ_REALLOC(p, x) NULL -#else -#define MZ_MALLOC(x) malloc(x) -#define MZ_FREE(x) free(x) -#define MZ_REALLOC(p, x) realloc(p, x) -#endif - -#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN -#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) -#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) -#else -#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) -#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) -#endif - -#define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U)) - -#ifdef _MSC_VER -#define MZ_FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__)) -#else -#define MZ_FORCEINLINE inline -#endif - -extern void *miniz_def_alloc_func(void *opaque, size_t items, size_t size); -extern void miniz_def_free_func(void *opaque, void *address); -extern void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size); - -#define MZ_UINT16_MAX (0xFFFFU) -#define MZ_UINT32_MAX (0xFFFFFFFFU) - - - - - -/* ------------------- Low-level Compression API Definitions */ - -/* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */ -#define TDEFL_LESS_MEMORY 0 - -/* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */ -/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */ -enum -{ - TDEFL_HUFFMAN_ONLY = 0, - TDEFL_DEFAULT_MAX_PROBES = 128, - TDEFL_MAX_PROBES_MASK = 0xFFF -}; - -/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */ -/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */ -/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */ -/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */ -/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */ -/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */ -/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */ -/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */ -/* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */ -enum -{ - TDEFL_WRITE_ZLIB_HEADER = 0x01000, - TDEFL_COMPUTE_ADLER32 = 0x02000, - TDEFL_GREEDY_PARSING_FLAG = 0x04000, - TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, - TDEFL_RLE_MATCHES = 0x10000, - TDEFL_FILTER_MATCHES = 0x20000, - TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, - TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 -}; - -/* High level compression functions: */ -/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */ -/* On entry: */ -/* pSrc_buf, src_buf_len: Pointer and size of source block to compress. */ -/* flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */ -/* On return: */ -/* Function returns a pointer to the compressed data, or NULL on failure. */ -/* *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */ -/* The caller must free() the returned block when it's no longer needed. */ -void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); - -/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */ -/* Returns 0 on failure. */ -size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); - -/* Compresses an image to a compressed PNG file in memory. */ -/* On entry: */ -/* pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */ -/* The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */ -/* level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */ -/* If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */ -/* On return: */ -/* Function returns a pointer to the compressed data, or NULL on failure. */ -/* *pLen_out will be set to the size of the PNG image file. */ -/* The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */ -void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); -void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); - -/* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */ -typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); - -/* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */ -mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); - -enum -{ - TDEFL_MAX_HUFF_TABLES = 3, - TDEFL_MAX_HUFF_SYMBOLS_0 = 288, - TDEFL_MAX_HUFF_SYMBOLS_1 = 32, - TDEFL_MAX_HUFF_SYMBOLS_2 = 19, - TDEFL_LZ_DICT_SIZE = 32768, - TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, - TDEFL_MIN_MATCH_LEN = 3, - TDEFL_MAX_MATCH_LEN = 258 -}; - -/* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */ -#if TDEFL_LESS_MEMORY -enum -{ - TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 12, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#else -enum -{ - TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 15, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#endif - -/* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */ -typedef enum { - TDEFL_STATUS_BAD_PARAM = -2, - TDEFL_STATUS_PUT_BUF_FAILED = -1, - TDEFL_STATUS_OKAY = 0, - TDEFL_STATUS_DONE = 1 -} tdefl_status; - -/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */ -typedef enum { - TDEFL_NO_FLUSH = 0, - TDEFL_SYNC_FLUSH = 2, - TDEFL_FULL_FLUSH = 3, - TDEFL_FINISH = 4 -} tdefl_flush; - -/* tdefl's compression state structure. */ -typedef struct -{ - tdefl_put_buf_func_ptr m_pPut_buf_func; - void *m_pPut_buf_user; - mz_uint m_flags, m_max_probes[2]; - int m_greedy_parsing; - mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; - mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; - mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; - mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; - tdefl_status m_prev_return_status; - const void *m_pIn_buf; - void *m_pOut_buf; - size_t *m_pIn_buf_size, *m_pOut_buf_size; - tdefl_flush m_flush; - const mz_uint8 *m_pSrc; - size_t m_src_buf_left, m_out_buf_ofs; - mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; - mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; - mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; - mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; - mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; -} tdefl_compressor; - -/* Initializes the compressor. */ -/* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */ -/* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */ -/* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */ -/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */ -tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); - -/* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */ -tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); - -/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */ -/* tdefl_compress_buffer() always consumes the entire input buffer. */ -tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); - -tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); -mz_uint32 tdefl_get_adler32(tdefl_compressor *d); - -/* Create tdefl_compress() flags given zlib-style compression parameters. */ -/* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */ -/* window_bits may be -15 (raw deflate) or 15 (zlib) */ -/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */ -mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); - -/* Allocate the tdefl_compressor structure in C so that */ -/* non-C language bindings to tdefl_ API don't need to worry about */ -/* structure size and allocation mechanism. */ -tdefl_compressor *tdefl_compressor_alloc(); -void tdefl_compressor_free(tdefl_compressor *pComp); - - - - -/* ------------------- Low-level Decompression API Definitions */ - - -/* Decompression flags used by tinfl_decompress(). */ -/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */ -/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */ -/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */ -/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */ -enum -{ - TINFL_FLAG_PARSE_ZLIB_HEADER = 1, - TINFL_FLAG_HAS_MORE_INPUT = 2, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, - TINFL_FLAG_COMPUTE_ADLER32 = 8 -}; - -/* High level decompression functions: */ -/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */ -/* On entry: */ -/* pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */ -/* On return: */ -/* Function returns a pointer to the decompressed data, or NULL on failure. */ -/* *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */ -/* The caller must call mz_free() on the returned block when it's no longer needed. */ -void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); - -/* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */ -/* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */ -#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) -size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); - -/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */ -/* Returns 1 on success or 0 on failure. */ -typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); -int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); - -struct tinfl_decompressor_tag; -typedef struct tinfl_decompressor_tag tinfl_decompressor; - -/* Allocate the tinfl_decompressor structure in C so that */ -/* non-C language bindings to tinfl_ API don't need to worry about */ -/* structure size and allocation mechanism. */ - -tinfl_decompressor *tinfl_decompressor_alloc(); -void tinfl_decompressor_free(tinfl_decompressor *pDecomp); - -/* Max size of LZ dictionary. */ -#define TINFL_LZ_DICT_SIZE 32768 - -/* Return status. */ -typedef enum { - /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */ - /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */ - /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */ - TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4, - - /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */ - TINFL_STATUS_BAD_PARAM = -3, - - /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */ - TINFL_STATUS_ADLER32_MISMATCH = -2, - - /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */ - TINFL_STATUS_FAILED = -1, - - /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */ - - /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */ - /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */ - TINFL_STATUS_DONE = 0, - - /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */ - /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */ - /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */ - TINFL_STATUS_NEEDS_MORE_INPUT = 1, - - /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */ - /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */ - /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */ - /* so I may need to add some code to address this. */ - TINFL_STATUS_HAS_MORE_OUTPUT = 2 -} tinfl_status; - -/* Initializes the decompressor to its initial state. */ -#define tinfl_init(r) \ - do \ - { \ - (r)->m_state = 0; \ - } \ - MZ_MACRO_END -#define tinfl_get_adler32(r) (r)->m_check_adler32 - -/* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */ -/* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */ -tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); - -/* Internal/private bits follow. */ -enum -{ - TINFL_MAX_HUFF_TABLES = 3, - TINFL_MAX_HUFF_SYMBOLS_0 = 288, - TINFL_MAX_HUFF_SYMBOLS_1 = 32, - TINFL_MAX_HUFF_SYMBOLS_2 = 19, - TINFL_FAST_LOOKUP_BITS = 10, - TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS -}; - -typedef struct -{ - mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; - mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; -} tinfl_huff_table; - -#if MINIZ_HAS_64BIT_REGISTERS -#define TINFL_USE_64BIT_BITBUF 1 -#else -#define TINFL_USE_64BIT_BITBUF 0 -#endif - -#if TINFL_USE_64BIT_BITBUF -typedef mz_uint64 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (64) -#else -typedef mz_uint32 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (32) -#endif - -struct tinfl_decompressor_tag -{ - mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; - tinfl_bit_buf_t m_bit_buf; - size_t m_dist_from_out_buf_start; - tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; - mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; -}; - - - - - - -/* ------------------- ZIP archive reading/writing */ - -#ifndef MINIZ_NO_ARCHIVE_APIS - - -enum -{ - /* Note: These enums can be reduced as needed to save memory or stack space - they are pretty conservative. */ - MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, - MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 512, - MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512 -}; - -typedef struct -{ - /* Central directory file index. */ - mz_uint32 m_file_index; - - /* Byte offset of this entry in the archive's central directory. Note we currently only support up to UINT_MAX or less bytes in the central dir. */ - mz_uint64 m_central_dir_ofs; - - /* These fields are copied directly from the zip's central dir. */ - mz_uint16 m_version_made_by; - mz_uint16 m_version_needed; - mz_uint16 m_bit_flag; - mz_uint16 m_method; - -#ifndef MINIZ_NO_TIME - MZ_TIME_T m_time; -#endif - - /* CRC-32 of uncompressed data. */ - mz_uint32 m_crc32; - - /* File's compressed size. */ - mz_uint64 m_comp_size; - - /* File's uncompressed size. Note, I've seen some old archives where directory entries had 512 bytes for their uncompressed sizes, but when you try to unpack them you actually get 0 bytes. */ - mz_uint64 m_uncomp_size; - - /* Zip internal and external file attributes. */ - mz_uint16 m_internal_attr; - mz_uint32 m_external_attr; - - /* Entry's local header file offset in bytes. */ - mz_uint64 m_local_header_ofs; - - /* Size of comment in bytes. */ - mz_uint32 m_comment_size; - - /* MZ_TRUE if the entry appears to be a directory. */ - mz_bool m_is_directory; - - /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip doesn't support) */ - mz_bool m_is_encrypted; - - /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a compression method we support. */ - mz_bool m_is_supported; - - /* Filename. If string ends in '/' it's a subdirectory entry. */ - /* Guaranteed to be zero terminated, may be truncated to fit. */ - char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; - - /* Comment field. */ - /* Guaranteed to be zero terminated, may be truncated to fit. */ - char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; - -} mz_zip_archive_file_stat; - -typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); -typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); -typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque); - -struct mz_zip_internal_state_tag; -typedef struct mz_zip_internal_state_tag mz_zip_internal_state; - -typedef enum { - MZ_ZIP_MODE_INVALID = 0, - MZ_ZIP_MODE_READING = 1, - MZ_ZIP_MODE_WRITING = 2, - MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 -} mz_zip_mode; - -typedef enum { - MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, - MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, - MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, - MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800, - MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG = 0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each file as its validated to ensure the func finds the file in the central dir (intended for testing) */ - MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY = 0x2000, /* validate the local headers, but don't decompress the entire file and check the crc32 */ - MZ_ZIP_FLAG_WRITE_ZIP64 = 0x4000, /* always use the zip64 file format, instead of the original zip file format with automatic switch to zip64. Use as flags parameter with mz_zip_writer_init*_v2 */ - MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000, - MZ_ZIP_FLAG_ASCII_FILENAME = 0x10000 -} mz_zip_flags; - -typedef enum { - MZ_ZIP_TYPE_INVALID = 0, - MZ_ZIP_TYPE_USER, - MZ_ZIP_TYPE_MEMORY, - MZ_ZIP_TYPE_HEAP, - MZ_ZIP_TYPE_FILE, - MZ_ZIP_TYPE_CFILE, - MZ_ZIP_TOTAL_TYPES -} mz_zip_type; - -/* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or modify this enum. */ -typedef enum { - MZ_ZIP_NO_ERROR = 0, - MZ_ZIP_UNDEFINED_ERROR, - MZ_ZIP_TOO_MANY_FILES, - MZ_ZIP_FILE_TOO_LARGE, - MZ_ZIP_UNSUPPORTED_METHOD, - MZ_ZIP_UNSUPPORTED_ENCRYPTION, - MZ_ZIP_UNSUPPORTED_FEATURE, - MZ_ZIP_FAILED_FINDING_CENTRAL_DIR, - MZ_ZIP_NOT_AN_ARCHIVE, - MZ_ZIP_INVALID_HEADER_OR_CORRUPTED, - MZ_ZIP_UNSUPPORTED_MULTIDISK, - MZ_ZIP_DECOMPRESSION_FAILED, - MZ_ZIP_COMPRESSION_FAILED, - MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE, - MZ_ZIP_CRC_CHECK_FAILED, - MZ_ZIP_UNSUPPORTED_CDIR_SIZE, - MZ_ZIP_ALLOC_FAILED, - MZ_ZIP_FILE_OPEN_FAILED, - MZ_ZIP_FILE_CREATE_FAILED, - MZ_ZIP_FILE_WRITE_FAILED, - MZ_ZIP_FILE_READ_FAILED, - MZ_ZIP_FILE_CLOSE_FAILED, - MZ_ZIP_FILE_SEEK_FAILED, - MZ_ZIP_FILE_STAT_FAILED, - MZ_ZIP_INVALID_PARAMETER, - MZ_ZIP_INVALID_FILENAME, - MZ_ZIP_BUF_TOO_SMALL, - MZ_ZIP_INTERNAL_ERROR, - MZ_ZIP_FILE_NOT_FOUND, - MZ_ZIP_ARCHIVE_TOO_LARGE, - MZ_ZIP_VALIDATION_FAILED, - MZ_ZIP_WRITE_CALLBACK_FAILED, - MZ_ZIP_TOTAL_ERRORS -} mz_zip_error; - -typedef struct -{ - mz_uint64 m_archive_size; - mz_uint64 m_central_directory_file_ofs; - - /* We only support up to UINT32_MAX files in zip64 mode. */ - mz_uint32 m_total_files; - mz_zip_mode m_zip_mode; - mz_zip_type m_zip_type; - mz_zip_error m_last_error; - - mz_uint64 m_file_offset_alignment; - - mz_alloc_func m_pAlloc; - mz_free_func m_pFree; - mz_realloc_func m_pRealloc; - void *m_pAlloc_opaque; - - mz_file_read_func m_pRead; - mz_file_write_func m_pWrite; - mz_file_needs_keepalive m_pNeeds_keepalive; - void *m_pIO_opaque; - - mz_zip_internal_state *m_pState; - -} mz_zip_archive; - -typedef struct -{ - mz_zip_archive *pZip; - mz_uint flags; - - int status; -#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS - mz_uint file_crc32; -#endif - mz_uint64 read_buf_size, read_buf_ofs, read_buf_avail, comp_remaining, out_buf_ofs, cur_file_ofs; - mz_zip_archive_file_stat file_stat; - void *pRead_buf; - void *pWrite_buf; - - size_t out_blk_remain; - - tinfl_decompressor inflator; - -} mz_zip_reader_extract_iter_state; - -/* -------- ZIP reading */ - -/* Inits a ZIP archive reader. */ -/* These functions read and validate the archive's central directory. */ -mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags); - -mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags); - -#ifndef MINIZ_NO_STDIO -/* Read a archive from a disk file. */ -/* file_start_ofs is the file offset where the archive actually begins, or 0. */ -/* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */ -mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); -mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size); - -/* Read an archive from an already opened FILE, beginning at the current file position. */ -/* The archive is assumed to be archive_size bytes long. If archive_size is < 0, then the entire rest of the file is assumed to contain the archive. */ -/* The FILE will NOT be closed when mz_zip_reader_end() is called. */ -mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags); -#endif - -/* Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. */ -mz_bool mz_zip_reader_end(mz_zip_archive *pZip); - -/* -------- ZIP reading or writing */ - -/* Clears a mz_zip_archive struct to all zeros. */ -/* Important: This must be done before passing the struct to any mz_zip functions. */ -void mz_zip_zero_struct(mz_zip_archive *pZip); - -mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip); -mz_zip_type mz_zip_get_type(mz_zip_archive *pZip); - -/* Returns the total number of files in the archive. */ -mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); - -mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip); -mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip); -MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip); - -/* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf. */ -size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n); - -/* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct. These functions retrieve/manipulate this field. */ -/* Note that the m_last_error functionality is not thread safe. */ -mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num); -mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip); -mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip); -mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip); -const char *mz_zip_get_error_string(mz_zip_error mz_err); - -/* MZ_TRUE if the archive file entry is a directory entry. */ -mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); - -/* MZ_TRUE if the file is encrypted/strong encrypted. */ -mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); - -/* MZ_TRUE if the compression method is supported, and the file is not encrypted, and the file is not a compressed patch file. */ -mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index); - -/* Retrieves the filename of an archive file entry. */ -/* Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. */ -mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); - -/* Attempts to locates a file in the archive's central directory. */ -/* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */ -/* Returns -1 if the file cannot be found. */ -int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); -int mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *file_index); - -/* Returns detailed information about an archive file entry. */ -mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); - -/* MZ_TRUE if the file is in zip64 format. */ -/* A file is considered zip64 if it contained a zip64 end of central directory marker, or if it contained any zip64 extended file information fields in the central directory. */ -mz_bool mz_zip_is_zip64(mz_zip_archive *pZip); - -/* Returns the total central directory size in bytes. */ -/* The current max supported size is <= MZ_UINT32_MAX. */ -size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip); - -/* Extracts a archive file to a memory buffer using no memory allocation. */ -/* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */ -mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); -mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); - -/* Extracts a archive file to a memory buffer. */ -mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags); - -/* Extracts a archive file to a dynamically allocated heap buffer. */ -/* The memory will be allocated via the mz_zip_archive's alloc/realloc functions. */ -/* Returns NULL and sets the last error on failure. */ -void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags); -void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags); - -/* Extracts a archive file using a callback function to output the file's data. */ -mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); - -/* Extract a file iteratively */ -mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); -mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); -size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size); -mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState); - -#ifndef MINIZ_NO_STDIO -/* Extracts a archive file to a disk file and sets its last accessed and modified times. */ -/* This function only extracts files, not archive directory records. */ -mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); - -/* Extracts a archive file starting at the current position in the destination FILE stream. */ -mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *File, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags); -#endif - -#if 0 -/* TODO */ - typedef void *mz_zip_streaming_extract_state_ptr; - mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); - uint64_t mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); - uint64_t mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); - mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, uint64_t new_ofs); - size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size); - mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); -#endif - -/* This function compares the archive's local headers, the optional local zip64 extended information block, and the optional descriptor following the compressed data vs. the data in the central directory. */ -/* It also validates that each file can be successfully uncompressed unless the MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */ -mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); - -/* Validates an entire archive by calling mz_zip_validate_file() on each file. */ -mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags); - -/* Misc utils/helpers, valid for ZIP reading or writing */ -mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr); -mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr); - -/* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */ -mz_bool mz_zip_end(mz_zip_archive *pZip); - -/* -------- ZIP writing */ - -#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -/* Inits a ZIP archive writer. */ -/*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init or mz_zip_writer_init_v2*/ -/*The output is streamable, i.e. file_ofs in mz_file_write_func always increases only by n*/ -mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); -mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags); - -mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); -mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags); - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); -mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags); -mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags); -#endif - -/* Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. */ -/* For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. */ -/* For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). */ -/* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. */ -/* Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before */ -/* the archive is finalized the file's central directory will be hosed. */ -mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); -mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); - -/* Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. */ -/* To add a directory entry, call this method with an archive name ending in a forwardslash with an empty buffer. */ -/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ -mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); - -/* Like mz_zip_writer_add_mem(), except you can specify a file comment field, and optionally supply the function with already compressed data. */ -/* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA flag is specified. */ -mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, - mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); - -mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, - mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data_local, mz_uint user_extra_data_local_len, - const char *user_extra_data_central, mz_uint user_extra_data_central_len); - -#ifndef MINIZ_NO_STDIO -/* Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. */ -/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ -mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); - -/* Like mz_zip_writer_add_file(), except the file data is read from the specified FILE stream. */ -mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add, - const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, - const char *user_extra_data_central, mz_uint user_extra_data_central_len); -#endif - -/* Adds a file to an archive by fully cloning the data from another archive. */ -/* This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data (it may add or modify the zip64 local header extra data field), and the optional descriptor following the compressed data. */ -mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index); - -/* Finalizes the archive by writing the central directory records followed by the end of central directory record. */ -/* After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). */ -/* An archive must be manually finalized by calling this function for it to be valid. */ -mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); - -/* Finalizes a heap archive, returning a poiner to the heap block and its size. */ -/* The heap block will be allocated using the mz_zip_archive's alloc/realloc callbacks. */ -mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize); - -/* Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. */ -/* Note for the archive to be valid, it *must* have been finalized before ending (this function will not do it for you). */ -mz_bool mz_zip_writer_end(mz_zip_archive *pZip); - -/* -------- Misc. high-level helper functions: */ - -/* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. */ -/* Note this is NOT a fully safe operation. If it crashes or dies in some way your archive can be left in a screwed up state (without a central directory). */ -/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ -/* TODO: Perhaps add an option to leave the existing central dir in place in case the add dies? We could then truncate the file (so the old central dir would be at the end) if something goes wrong. */ -mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); -mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr); - -/* Reads a single file from an archive into a heap block. */ -/* If pComment is not NULL, only the file with the specified comment will be extracted. */ -/* Returns NULL on failure. */ -void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags); -void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr); - -#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ - - - -#endif /* MINIZ_NO_ARCHIVE_APIS */ - -} // namespace duckdb_miniz - - -// LICENSE_CHANGE_END - -#include -#include - -namespace duckdb { - -enum class MiniZStreamType { - MINIZ_TYPE_NONE, - MINIZ_TYPE_INFLATE, - MINIZ_TYPE_DEFLATE -}; - -struct MiniZStream { - static constexpr uint8_t GZIP_HEADER_MINSIZE = 10; - static constexpr uint8_t GZIP_FOOTER_SIZE = 8; - static constexpr uint8_t GZIP_COMPRESSION_DEFLATE = 0x08; - static constexpr unsigned char GZIP_FLAG_UNSUPPORTED = 0x1 | 0x2 | 0x4 | 0x10 | 0x20; - -public: - MiniZStream() : type(MiniZStreamType::MINIZ_TYPE_NONE) { - memset(&stream, 0, sizeof(duckdb_miniz::mz_stream)); - } - ~MiniZStream() { - switch(type) { - case MiniZStreamType::MINIZ_TYPE_INFLATE: - duckdb_miniz::mz_inflateEnd(&stream); - break; - case MiniZStreamType::MINIZ_TYPE_DEFLATE: - duckdb_miniz::mz_deflateEnd(&stream); - break; - default: - break; - } - } - void FormatException(std::string error_msg) { - throw std::runtime_error(error_msg); - } - void FormatException(const char *error_msg, int mz_ret) { - auto err = duckdb_miniz::mz_error(mz_ret); - FormatException(error_msg + std::string(": ") + (err ? err : "Unknown error code")); - } - void Decompress(const char *compressed_data, size_t compressed_size, char *out_data, size_t out_size) { - auto mz_ret = mz_inflateInit2(&stream, -MZ_DEFAULT_WINDOW_BITS); - if (mz_ret != duckdb_miniz::MZ_OK) { - FormatException("Failed to initialize miniz", mz_ret); - } - type = MiniZStreamType::MINIZ_TYPE_INFLATE; - - if (compressed_size < GZIP_HEADER_MINSIZE) { - FormatException("Failed to decompress GZIP block: compressed size is less than gzip header size"); - } - auto gzip_hdr = (const unsigned char *)compressed_data; - if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B || gzip_hdr[2] != GZIP_COMPRESSION_DEFLATE || - gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) { - FormatException("Input is invalid/unsupported GZIP stream"); - } - - stream.next_in = (const unsigned char *)compressed_data + GZIP_HEADER_MINSIZE; - stream.avail_in = compressed_size - GZIP_HEADER_MINSIZE; - stream.next_out = (unsigned char *)out_data; - stream.avail_out = out_size; - - mz_ret = mz_inflate(&stream, duckdb_miniz::MZ_FINISH); - if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { - FormatException("Failed to decompress GZIP block", mz_ret); - } - } - size_t MaxCompressedLength(size_t input_size) { - return duckdb_miniz::mz_compressBound(input_size) + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; - } - static void InitializeGZIPHeader(unsigned char *gzip_header) { - memset(gzip_header, 0, GZIP_HEADER_MINSIZE); - gzip_header[0] = 0x1F; - gzip_header[1] = 0x8B; - gzip_header[2] = GZIP_COMPRESSION_DEFLATE; - gzip_header[3] = 0; - gzip_header[4] = 0; - gzip_header[5] = 0; - gzip_header[6] = 0; - gzip_header[7] = 0; - gzip_header[8] = 0; - gzip_header[9] = 0xFF; - } - - static void InitializeGZIPFooter(unsigned char *gzip_footer, duckdb_miniz::mz_ulong crc, idx_t uncompressed_size) { - gzip_footer[0] = crc & 0xFF; - gzip_footer[1] = (crc >> 8) & 0xFF; - gzip_footer[2] = (crc >> 16) & 0xFF; - gzip_footer[3] = (crc >> 24) & 0xFF; - gzip_footer[4] = uncompressed_size & 0xFF; - gzip_footer[5] = (uncompressed_size >> 8) & 0xFF; - gzip_footer[6] = (uncompressed_size >> 16) & 0xFF; - gzip_footer[7] = (uncompressed_size >> 24) & 0xFF; - } - - void Compress(const char *uncompressed_data, size_t uncompressed_size, char *out_data, size_t *out_size) { - auto mz_ret = mz_deflateInit2(&stream, duckdb_miniz::MZ_DEFAULT_LEVEL, MZ_DEFLATED, -MZ_DEFAULT_WINDOW_BITS, 1, 0); - if (mz_ret != duckdb_miniz::MZ_OK) { - FormatException("Failed to initialize miniz", mz_ret); - } - type = MiniZStreamType::MINIZ_TYPE_DEFLATE; - - auto gzip_header = (unsigned char*) out_data; - InitializeGZIPHeader(gzip_header); - - auto gzip_body = gzip_header + GZIP_HEADER_MINSIZE; - - stream.next_in = (const unsigned char*) uncompressed_data; - stream.avail_in = uncompressed_size; - stream.next_out = gzip_body; - stream.avail_out = *out_size - GZIP_HEADER_MINSIZE; - - mz_ret = mz_deflate(&stream, duckdb_miniz::MZ_FINISH); - if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { - FormatException("Failed to compress GZIP block", mz_ret); - } - auto gzip_footer = gzip_body + stream.total_out; - auto crc = duckdb_miniz::mz_crc32(MZ_CRC32_INIT, (const unsigned char*) uncompressed_data, uncompressed_size); - InitializeGZIPFooter(gzip_footer, crc, uncompressed_size); - - *out_size = stream.total_out + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; - } - -private: - duckdb_miniz::mz_stream stream; - MiniZStreamType type; -}; - -} - - -// LICENSE_CHANGE_END - - -#include - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/blob.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#endif - -namespace duckdb { - -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Encoding; -using duckdb_parquet::format::PageType; -using duckdb_parquet::format::Type; - -const uint32_t ParquetDecodeUtils::BITPACK_MASKS[] = { - 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, - 2047, 4095, 8191, 16383, 32767, 65535, 131071, 262143, 524287, 1048575, 2097151, - 4194303, 8388607, 16777215, 33554431, 67108863, 134217727, 268435455, 536870911, 1073741823, 2147483647}; - -const uint8_t ParquetDecodeUtils::BITPACK_DLEN = 8; - -ColumnReader::ColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : schema(schema_p), file_idx(file_idx_p), max_define(max_define_p), max_repeat(max_repeat_p), reader(reader), - type(move(type_p)), page_rows_available(0) { - - // dummies for Skip() - none_filter.none(); - dummy_define.resize(reader.allocator, STANDARD_VECTOR_SIZE); - dummy_repeat.resize(reader.allocator, STANDARD_VECTOR_SIZE); -} - -ColumnReader::~ColumnReader() { -} - -ParquetReader &ColumnReader::Reader() { - return reader; -} - -const LogicalType &ColumnReader::Type() const { - return type; -} - -const SchemaElement &ColumnReader::Schema() const { - return schema; -} - -idx_t ColumnReader::FileIdx() const { - return file_idx; -} - -idx_t ColumnReader::MaxDefine() const { - return max_define; -} - -idx_t ColumnReader::MaxRepeat() const { - return max_repeat; -} - -void ColumnReader::RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) { - if (chunk) { - uint64_t size = chunk->meta_data.total_compressed_size; - transport.RegisterPrefetch(FileOffset(), size, allow_merge); - } -} - -uint64_t ColumnReader::TotalCompressedSize() { - if (!chunk) { - return 0; - } - - return chunk->meta_data.total_compressed_size; -} - -// Note: It's not trivial to determine where all Column data is stored. Chunk->file_offset -// apparently is not the first page of the data. Therefore we determine the address of the first page by taking the -// minimum of all page offsets. -idx_t ColumnReader::FileOffset() const { - if (!chunk) { - throw std::runtime_error("FileOffset called on ColumnReader with no chunk"); - } - auto min_offset = NumericLimits::Maximum(); - if (chunk->meta_data.__isset.dictionary_page_offset) { - min_offset = MinValue(min_offset, chunk->meta_data.dictionary_page_offset); - } - if (chunk->meta_data.__isset.index_page_offset) { - min_offset = MinValue(min_offset, chunk->meta_data.index_page_offset); - } - min_offset = MinValue(min_offset, chunk->meta_data.data_page_offset); - - return min_offset; -} - -idx_t ColumnReader::GroupRowsAvailable() { - return group_rows_available; -} - -unique_ptr ColumnReader::Stats(const std::vector &columns) { - if (Type().id() == LogicalTypeId::LIST || Type().id() == LogicalTypeId::STRUCT || - Type().id() == LogicalTypeId::MAP) { - return nullptr; - } - return ParquetStatisticsUtils::TransformColumnStatistics(Schema(), Type(), columns[file_idx]); -} - -void ColumnReader::Plain(shared_ptr plain_data, uint8_t *defines, idx_t num_values, // NOLINT - parquet_filter_t &filter, idx_t result_offset, Vector &result) { - throw NotImplementedException("Plain"); -} - -void ColumnReader::Dictionary(shared_ptr dictionary_data, idx_t num_entries) { // NOLINT - throw NotImplementedException("Dictionary"); -} - -void ColumnReader::Offsets(uint32_t *offsets, uint8_t *defines, idx_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) { - throw NotImplementedException("Offsets"); -} - -void ColumnReader::DictReference(Vector &result) { -} -void ColumnReader::PlainReference(shared_ptr, Vector &result) { // NOLINT -} - -void ColumnReader::InitializeRead(const std::vector &columns, TProtocol &protocol_p) { - D_ASSERT(file_idx < columns.size()); - chunk = &columns[file_idx]; - protocol = &protocol_p; - D_ASSERT(chunk); - D_ASSERT(chunk->__isset.meta_data); - - if (chunk->__isset.file_path) { - throw std::runtime_error("Only inlined data files are supported (no references)"); - } - - // ugh. sometimes there is an extra offset for the dict. sometimes it's wrong. - chunk_read_offset = chunk->meta_data.data_page_offset; - if (chunk->meta_data.__isset.dictionary_page_offset && chunk->meta_data.dictionary_page_offset >= 4) { - // this assumes the data pages follow the dict pages directly. - chunk_read_offset = chunk->meta_data.dictionary_page_offset; - } - group_rows_available = chunk->meta_data.num_values; -} - -void ColumnReader::PrepareRead(parquet_filter_t &filter) { - dict_decoder.reset(); - defined_decoder.reset(); - block.reset(); - - PageHeader page_hdr; - page_hdr.read(protocol); - - switch (page_hdr.type) { - case PageType::DATA_PAGE_V2: - PreparePageV2(page_hdr); - PrepareDataPage(page_hdr); - break; - case PageType::DATA_PAGE: - PreparePage(page_hdr.compressed_page_size, page_hdr.uncompressed_page_size); - PrepareDataPage(page_hdr); - break; - case PageType::DICTIONARY_PAGE: - PreparePage(page_hdr.compressed_page_size, page_hdr.uncompressed_page_size); - Dictionary(move(block), page_hdr.dictionary_page_header.num_values); - break; - default: - break; // ignore INDEX page type and any other custom extensions - } -} - -void ColumnReader::PreparePageV2(PageHeader &page_hdr) { - // FIXME this is copied from the other prepare, merge the decomp part - - D_ASSERT(page_hdr.type == PageType::DATA_PAGE_V2); - - auto &trans = (ThriftFileTransport &)*protocol->getTransport(); - - block = make_shared(reader.allocator, page_hdr.uncompressed_page_size + 1); - // copy repeats & defines as-is because FOR SOME REASON they are uncompressed - auto uncompressed_bytes = page_hdr.data_page_header_v2.repetition_levels_byte_length + - page_hdr.data_page_header_v2.definition_levels_byte_length; - auto possibly_compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes; - trans.read((uint8_t *)block->ptr, uncompressed_bytes); - - switch (chunk->meta_data.codec) { - case CompressionCodec::UNCOMPRESSED: - trans.read(((uint8_t *)block->ptr) + uncompressed_bytes, possibly_compressed_bytes); - break; - - case CompressionCodec::SNAPPY: { - // TODO move allocation outta here - ResizeableBuffer compressed_bytes_buffer(reader.allocator, possibly_compressed_bytes); - trans.read((uint8_t *)compressed_bytes_buffer.ptr, possibly_compressed_bytes); - - auto res = duckdb_snappy::RawUncompress((const char *)compressed_bytes_buffer.ptr, possibly_compressed_bytes, - ((char *)block->ptr) + uncompressed_bytes); - if (!res) { - throw std::runtime_error("Decompression failure"); - } - break; - } - - default: { - std::stringstream codec_name; - codec_name << chunk->meta_data.codec; - throw std::runtime_error("Unsupported compression codec \"" + codec_name.str() + - "\". Supported options are uncompressed, gzip or snappy"); - break; - } - } -} - -void ColumnReader::PreparePage(idx_t compressed_page_size, idx_t uncompressed_page_size) { - auto &trans = (ThriftFileTransport &)*protocol->getTransport(); - - block = make_shared(reader.allocator, compressed_page_size + 1); - trans.read((uint8_t *)block->ptr, compressed_page_size); - - // TODO this allocation should probably be avoided - shared_ptr unpacked_block; - if (chunk->meta_data.codec != CompressionCodec::UNCOMPRESSED) { - unpacked_block = make_shared(reader.allocator, uncompressed_page_size + 1); - } - - switch (chunk->meta_data.codec) { - case CompressionCodec::UNCOMPRESSED: - break; - case CompressionCodec::GZIP: { - MiniZStream s; - - s.Decompress((const char *)block->ptr, compressed_page_size, (char *)unpacked_block->ptr, - uncompressed_page_size); - block = move(unpacked_block); - - break; - } - case CompressionCodec::SNAPPY: { - auto res = - duckdb_snappy::RawUncompress((const char *)block->ptr, compressed_page_size, (char *)unpacked_block->ptr); - if (!res) { - throw std::runtime_error("Decompression failure"); - } - block = move(unpacked_block); - break; - } - case CompressionCodec::ZSTD: { - auto res = duckdb_zstd::ZSTD_decompress((char *)unpacked_block->ptr, uncompressed_page_size, - (const char *)block->ptr, compressed_page_size); - if (duckdb_zstd::ZSTD_isError(res) || res != (size_t)uncompressed_page_size) { - throw std::runtime_error("ZSTD Decompression failure"); - } - block = move(unpacked_block); - break; - } - - default: { - std::stringstream codec_name; - codec_name << chunk->meta_data.codec; - throw std::runtime_error("Unsupported compression codec \"" + codec_name.str() + - "\". Supported options are uncompressed, gzip or snappy"); - break; - } - } -} - -void ColumnReader::PrepareDataPage(PageHeader &page_hdr) { - if (page_hdr.type == PageType::DATA_PAGE && !page_hdr.__isset.data_page_header) { - throw std::runtime_error("Missing data page header from data page"); - } - if (page_hdr.type == PageType::DATA_PAGE_V2 && !page_hdr.__isset.data_page_header_v2) { - throw std::runtime_error("Missing data page header from data page v2"); - } - - page_rows_available = page_hdr.type == PageType::DATA_PAGE ? page_hdr.data_page_header.num_values - : page_hdr.data_page_header_v2.num_values; - auto page_encoding = page_hdr.type == PageType::DATA_PAGE ? page_hdr.data_page_header.encoding - : page_hdr.data_page_header_v2.encoding; - - if (HasRepeats()) { - uint32_t rep_length = page_hdr.type == PageType::DATA_PAGE - ? block->read() - : page_hdr.data_page_header_v2.repetition_levels_byte_length; - block->available(rep_length); - repeated_decoder = make_unique((const uint8_t *)block->ptr, rep_length, - RleBpDecoder::ComputeBitWidth(max_repeat)); - block->inc(rep_length); - } - - if (HasDefines()) { - uint32_t def_length = page_hdr.type == PageType::DATA_PAGE - ? block->read() - : page_hdr.data_page_header_v2.definition_levels_byte_length; - block->available(def_length); - defined_decoder = make_unique((const uint8_t *)block->ptr, def_length, - RleBpDecoder::ComputeBitWidth(max_define)); - block->inc(def_length); - } - - switch (page_encoding) { - case Encoding::RLE_DICTIONARY: - case Encoding::PLAIN_DICTIONARY: { - // where is it otherwise?? - auto dict_width = block->read(); - // TODO somehow dict_width can be 0 ? - dict_decoder = make_unique((const uint8_t *)block->ptr, block->len, dict_width); - block->inc(block->len); - break; - } - case Encoding::DELTA_BINARY_PACKED: { - dbp_decoder = make_unique((const uint8_t *)block->ptr, block->len); - block->inc(block->len); - break; - } - /* - case Encoding::DELTA_BYTE_ARRAY: { - dbp_decoder = make_unique((const uint8_t *)block->ptr, block->len); - auto prefix_buffer = make_shared(); - prefix_buffer->resize(reader.allocator, sizeof(uint32_t) * page_hdr.data_page_header_v2.num_rows); - - auto suffix_buffer = make_shared(); - suffix_buffer->resize(reader.allocator, sizeof(uint32_t) * page_hdr.data_page_header_v2.num_rows); - - dbp_decoder->GetBatch(prefix_buffer->ptr, page_hdr.data_page_header_v2.num_rows); - auto buffer_after_prefixes = dbp_decoder->BufferPtr(); - - dbp_decoder = make_unique((const uint8_t *)buffer_after_prefixes.ptr, buffer_after_prefixes.len); - dbp_decoder->GetBatch(suffix_buffer->ptr, page_hdr.data_page_header_v2.num_rows); - - auto string_buffer = dbp_decoder->BufferPtr(); - - for (idx_t i = 0 ; i < page_hdr.data_page_header_v2.num_rows; i++) { - auto suffix_length = (uint32_t*) suffix_buffer->ptr; - string str( suffix_length[i] + 1, '\0'); - string_buffer.copy_to((char*) str.data(), suffix_length[i]); - printf("%s\n", str.c_str()); - } - throw std::runtime_error("eek"); - - - // This is also known as incremental encoding or front compression: for each element in a sequence of strings, - // store the prefix length of the previous entry plus the suffix. This is stored as a sequence of delta-encoded - // prefix lengths (DELTA_BINARY_PACKED), followed by the suffixes encoded as delta length byte arrays - // (DELTA_LENGTH_BYTE_ARRAY). DELTA_LENGTH_BYTE_ARRAY: The encoded data would be DeltaEncoding(5, 5, 6, 6) - // "HelloWorldFoobarABCDEF" - - // TODO actually do something here - break; - } - */ - case Encoding::PLAIN: - // nothing to do here, will be read directly below - break; - - default: - throw std::runtime_error("Unsupported page encoding"); - } -} - -idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result) { - // we need to reset the location because multiple column readers share the same protocol - auto &trans = (ThriftFileTransport &)*protocol->getTransport(); - trans.SetLocation(chunk_read_offset); - - // Perform any skips that were not applied yet. - if (pending_skips > 0) { - ApplyPendingSkips(pending_skips); - } - - idx_t result_offset = 0; - auto to_read = num_values; - - while (to_read > 0) { - while (page_rows_available == 0) { - PrepareRead(filter); - } - - D_ASSERT(block); - auto read_now = MinValue(to_read, page_rows_available); - - D_ASSERT(read_now <= STANDARD_VECTOR_SIZE); - - if (HasRepeats()) { - D_ASSERT(repeated_decoder); - repeated_decoder->GetBatch((char *)repeat_out + result_offset, read_now); - } - - if (HasDefines()) { - D_ASSERT(defined_decoder); - defined_decoder->GetBatch((char *)define_out + result_offset, read_now); - } - - idx_t null_count = 0; - - if ((dict_decoder || dbp_decoder) && HasDefines()) { - // we need the null count because the dictionary offsets have no entries for nulls - for (idx_t i = 0; i < read_now; i++) { - if (define_out[i + result_offset] != max_define) { - null_count++; - } - } - } - - if (dict_decoder) { - offset_buffer.resize(reader.allocator, sizeof(uint32_t) * (read_now - null_count)); - dict_decoder->GetBatch(offset_buffer.ptr, read_now - null_count); - DictReference(result); - Offsets((uint32_t *)offset_buffer.ptr, define_out, read_now, filter, result_offset, result); - } else if (dbp_decoder) { - // TODO keep this in the state - auto read_buf = make_shared(); - - switch (type.id()) { - case LogicalTypeId::INTEGER: - read_buf->resize(reader.allocator, sizeof(int32_t) * (read_now - null_count)); - dbp_decoder->GetBatch(read_buf->ptr, read_now - null_count); - - break; - case LogicalTypeId::BIGINT: - read_buf->resize(reader.allocator, sizeof(int64_t) * (read_now - null_count)); - dbp_decoder->GetBatch(read_buf->ptr, read_now - null_count); - break; - - default: - throw std::runtime_error("DELTA_BINARY_PACKED should only be INT32 or INT64"); - } - // Plain() will put NULLs in the right place - Plain(read_buf, define_out, read_now, filter, result_offset, result); - } else { - PlainReference(block, result); - Plain(block, define_out, read_now, filter, result_offset, result); - } - - result_offset += read_now; - page_rows_available -= read_now; - to_read -= read_now; - } - group_rows_available -= num_values; - chunk_read_offset = trans.GetLocation(); - - return num_values; -} - -void ColumnReader::Skip(idx_t num_values) { - pending_skips += num_values; -} - -void ColumnReader::ApplyPendingSkips(idx_t num_values) { - pending_skips -= num_values; - - dummy_define.zero(); - dummy_repeat.zero(); - - // TODO this can be optimized, for example we dont actually have to bitunpack offsets - Vector dummy_result(type, nullptr); - - idx_t remaining = num_values; - idx_t read = 0; - - while (remaining) { - idx_t to_read = MinValue(remaining, STANDARD_VECTOR_SIZE); - read += Read(to_read, none_filter, (uint8_t *)dummy_define.ptr, (uint8_t *)dummy_repeat.ptr, dummy_result); - remaining -= to_read; - } - - if (read != num_values) { - throw std::runtime_error("Row count mismatch when skipping rows"); - } -} - -//===--------------------------------------------------------------------===// -// String Column Reader -//===--------------------------------------------------------------------===// -StringColumnReader::StringColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, - idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader(reader, move(type_p), schema_p, schema_idx_p, - max_define_p, max_repeat_p) { - fixed_width_string_length = 0; - if (schema_p.type == Type::FIXED_LEN_BYTE_ARRAY) { - D_ASSERT(schema_p.__isset.type_length); - fixed_width_string_length = schema_p.type_length; - } -} - -uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) { - if (Type() != LogicalTypeId::VARCHAR) { - return str_len; - } - // verify if a string is actually UTF8, and if there are no null bytes in the middle of the string - // technically Parquet should guarantee this, but reality is often disappointing - UnicodeInvalidReason reason; - size_t pos; - auto utf_type = Utf8Proc::Analyze(str_data, str_len, &reason, &pos); - if (utf_type == UnicodeType::INVALID) { - if (reason == UnicodeInvalidReason::NULL_BYTE) { - // for null bytes we just truncate the string - return pos; - } - throw InvalidInputException("Invalid string encoding found in Parquet file: value \"" + - Blob::ToString(string_t(str_data, str_len)) + "\" is not valid UTF8!"); - } - return str_len; -} - -void StringColumnReader::Dictionary(shared_ptr data, idx_t num_entries) { - dict = move(data); - dict_strings = unique_ptr(new string_t[num_entries]); - for (idx_t dict_idx = 0; dict_idx < num_entries; dict_idx++) { - uint32_t str_len; - if (fixed_width_string_length == 0) { - // variable length string: read from dictionary - str_len = dict->read(); - } else { - // fixed length string - str_len = fixed_width_string_length; - } - dict->available(str_len); - - auto actual_str_len = VerifyString(dict->ptr, str_len); - dict_strings[dict_idx] = string_t(dict->ptr, actual_str_len); - dict->inc(str_len); - } -} - -class ParquetStringVectorBuffer : public VectorBuffer { -public: - explicit ParquetStringVectorBuffer(shared_ptr buffer_p) - : VectorBuffer(VectorBufferType::OPAQUE_BUFFER), buffer(move(buffer_p)) { - } - -private: - shared_ptr buffer; -}; - -void StringColumnReader::DictReference(Vector &result) { - StringVector::AddBuffer(result, make_buffer(dict)); -} -void StringColumnReader::PlainReference(shared_ptr plain_data, Vector &result) { - StringVector::AddBuffer(result, make_buffer(move(plain_data))); -} - -string_t StringParquetValueConversion::DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto &dict_strings = ((StringColumnReader &)reader).dict_strings; - return dict_strings[offset]; -} - -string_t StringParquetValueConversion::PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - auto &scr = ((StringColumnReader &)reader); - uint32_t str_len = scr.fixed_width_string_length == 0 ? plain_data.read() : scr.fixed_width_string_length; - plain_data.available(str_len); - auto actual_str_len = ((StringColumnReader &)reader).VerifyString(plain_data.ptr, str_len); - auto ret_str = string_t(plain_data.ptr, actual_str_len); - plain_data.inc(str_len); - return ret_str; -} - -void StringParquetValueConversion::PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - auto &scr = ((StringColumnReader &)reader); - uint32_t str_len = scr.fixed_width_string_length == 0 ? plain_data.read() : scr.fixed_width_string_length; - plain_data.inc(str_len); -} - -//===--------------------------------------------------------------------===// -// List Column Reader -//===--------------------------------------------------------------------===// -idx_t ListColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result_out) { - idx_t result_offset = 0; - auto result_ptr = FlatVector::GetData(result_out); - auto &result_mask = FlatVector::Validity(result_out); - - if (pending_skips > 0) { - ApplyPendingSkips(pending_skips); - } - - D_ASSERT(ListVector::GetListSize(result_out) == 0); - // if an individual list is longer than STANDARD_VECTOR_SIZE we actually have to loop the child read to fill it - bool finished = false; - while (!finished) { - idx_t child_actual_num_values = 0; - - // check if we have any overflow from a previous read - if (overflow_child_count == 0) { - // we don't: read elements from the child reader - child_defines.zero(); - child_repeats.zero(); - // we don't know in advance how many values to read because of the beautiful repetition/definition setup - // we just read (up to) a vector from the child column, and see if we have read enough - // if we have not read enough, we read another vector - // if we have read enough, we leave any unhandled elements in the overflow vector for a subsequent read - auto child_req_num_values = - MinValue(STANDARD_VECTOR_SIZE, child_column_reader->GroupRowsAvailable()); - read_vector.ResetFromCache(read_cache); - child_actual_num_values = child_column_reader->Read(child_req_num_values, child_filter, child_defines_ptr, - child_repeats_ptr, read_vector); - } else { - // we do: use the overflow values - child_actual_num_values = overflow_child_count; - overflow_child_count = 0; - } - - if (child_actual_num_values == 0) { - // no more elements available: we are done - break; - } - read_vector.Verify(child_actual_num_values); - idx_t current_chunk_offset = ListVector::GetListSize(result_out); - - // hard-won piece of code this, modify at your own risk - // the intuition is that we have to only collapse values into lists that are repeated *on this level* - // the rest is pretty much handed up as-is as a single-valued list or NULL - idx_t child_idx; - for (child_idx = 0; child_idx < child_actual_num_values; child_idx++) { - if (child_repeats_ptr[child_idx] == max_repeat) { - // value repeats on this level, append - D_ASSERT(result_offset > 0); - result_ptr[result_offset - 1].length++; - continue; - } - - if (result_offset >= num_values) { - // we ran out of output space - finished = true; - break; - } - if (child_defines_ptr[child_idx] >= max_define) { - // value has been defined down the stack, hence its NOT NULL - result_ptr[result_offset].offset = child_idx + current_chunk_offset; - result_ptr[result_offset].length = 1; - } else if (child_defines_ptr[child_idx] == max_define - 1) { - // empty list - result_ptr[result_offset].offset = child_idx + current_chunk_offset; - result_ptr[result_offset].length = 0; - } else { - // value is NULL somewhere up the stack - result_mask.SetInvalid(result_offset); - result_ptr[result_offset].offset = 0; - result_ptr[result_offset].length = 0; - } - - repeat_out[result_offset] = child_repeats_ptr[child_idx]; - define_out[result_offset] = child_defines_ptr[child_idx]; - - result_offset++; - } - // actually append the required elements to the child list - ListVector::Append(result_out, read_vector, child_idx); - - // we have read more values from the child reader than we can fit into the result for this read - // we have to pass everything from child_idx to child_actual_num_values into the next call - if (child_idx < child_actual_num_values && result_offset == num_values) { - read_vector.Slice(read_vector, child_idx); - overflow_child_count = child_actual_num_values - child_idx; - read_vector.Verify(overflow_child_count); - - // move values in the child repeats and defines *backward* by child_idx - for (idx_t repdef_idx = 0; repdef_idx < overflow_child_count; repdef_idx++) { - child_defines_ptr[repdef_idx] = child_defines_ptr[child_idx + repdef_idx]; - child_repeats_ptr[repdef_idx] = child_repeats_ptr[child_idx + repdef_idx]; - } - } - } - result_out.Verify(result_offset); - return result_offset; -} - -ListColumnReader::ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, - idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p, - unique_ptr child_column_reader_p) - : ColumnReader(reader, move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p), - child_column_reader(move(child_column_reader_p)), read_cache(reader.allocator, ListType::GetChildType(Type())), - read_vector(read_cache), overflow_child_count(0) { - - child_defines.resize(reader.allocator, STANDARD_VECTOR_SIZE); - child_repeats.resize(reader.allocator, STANDARD_VECTOR_SIZE); - child_defines_ptr = (uint8_t *)child_defines.ptr; - child_repeats_ptr = (uint8_t *)child_repeats.ptr; - - child_filter.set(); -} - -void ListColumnReader::ApplyPendingSkips(idx_t num_values) { - pending_skips -= num_values; - - auto define_out = unique_ptr(new uint8_t[num_values]); - auto repeat_out = unique_ptr(new uint8_t[num_values]); - - idx_t remaining = num_values; - idx_t read = 0; - - while (remaining) { - Vector result_out(Type()); - parquet_filter_t filter; - idx_t to_read = MinValue(remaining, STANDARD_VECTOR_SIZE); - read += Read(to_read, filter, define_out.get(), repeat_out.get(), result_out); - remaining -= to_read; - } - - if (read != num_values) { - throw InternalException("Not all skips done!"); - } -} -//===--------------------------------------------------------------------===// -// Cast Column Reader -//===--------------------------------------------------------------------===// -CastColumnReader::CastColumnReader(unique_ptr child_reader_p, LogicalType target_type_p) - : ColumnReader(child_reader_p->Reader(), move(target_type_p), child_reader_p->Schema(), child_reader_p->FileIdx(), - child_reader_p->MaxDefine(), child_reader_p->MaxRepeat()), - child_reader(move(child_reader_p)) { - vector intermediate_types {child_reader->Type()}; - intermediate_chunk.Initialize(reader.allocator, intermediate_types); -} - -unique_ptr CastColumnReader::Stats(const std::vector &columns) { - // casting stats is not supported (yet) - return nullptr; -} - -void CastColumnReader::InitializeRead(const std::vector &columns, TProtocol &protocol_p) { - child_reader->InitializeRead(columns, protocol_p); -} - -idx_t CastColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result) { - intermediate_chunk.Reset(); - auto &intermediate_vector = intermediate_chunk.data[0]; - - auto amount = child_reader->Read(num_values, filter, define_out, repeat_out, intermediate_vector); - if (!filter.all()) { - // work-around for filters: set all values that are filtered to NULL to prevent the cast from failing on - // uninitialized data - intermediate_vector.Normalify(amount); - auto &validity = FlatVector::Validity(intermediate_vector); - for (idx_t i = 0; i < amount; i++) { - if (!filter[i]) { - validity.SetInvalid(i); - } - } - } - VectorOperations::Cast(intermediate_vector, result, amount); - return amount; -} - -void CastColumnReader::Skip(idx_t num_values) { - child_reader->Skip(num_values); -} - -idx_t CastColumnReader::GroupRowsAvailable() { - return child_reader->GroupRowsAvailable(); -} - -//===--------------------------------------------------------------------===// -// Struct Column Reader -//===--------------------------------------------------------------------===// -StructColumnReader::StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, - idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p, - vector> child_readers_p) - : ColumnReader(reader, move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p), - child_readers(move(child_readers_p)) { - D_ASSERT(type.InternalType() == PhysicalType::STRUCT); -} - -ColumnReader *StructColumnReader::GetChildReader(idx_t child_idx) { - return child_readers[child_idx].get(); -} - -void StructColumnReader::InitializeRead(const std::vector &columns, TProtocol &protocol_p) { - for (auto &child : child_readers) { - child->InitializeRead(columns, protocol_p); - } -} - -idx_t StructColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result) { - auto &struct_entries = StructVector::GetEntries(result); - D_ASSERT(StructType::GetChildTypes(Type()).size() == struct_entries.size()); - - if (pending_skips > 0) { - ApplyPendingSkips(pending_skips); - } - - idx_t read_count = num_values; - for (idx_t i = 0; i < struct_entries.size(); i++) { - auto child_num_values = child_readers[i]->Read(num_values, filter, define_out, repeat_out, *struct_entries[i]); - if (i == 0) { - read_count = child_num_values; - } else if (read_count != child_num_values) { - throw std::runtime_error("Struct child row count mismatch"); - } - } - // set the validity mask for this level - auto &validity = FlatVector::Validity(result); - for (idx_t i = 0; i < read_count; i++) { - if (define_out[i] < max_define) { - validity.SetInvalid(i); - } - } - - return read_count; -} - -void StructColumnReader::Skip(idx_t num_values) { - for (auto &child_reader : child_readers) { - child_reader->Skip(num_values); - } -} - -void StructColumnReader::RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) { - for (auto &child : child_readers) { - child->RegisterPrefetch(transport, allow_merge); - } -} - -uint64_t StructColumnReader::TotalCompressedSize() { - uint64_t size = 0; - for (auto &child : child_readers) { - size += child->TotalCompressedSize(); - } - return size; -} - -static bool TypeHasExactRowCount(const LogicalType &type) { - switch (type.id()) { - case LogicalTypeId::LIST: - case LogicalTypeId::MAP: - return false; - case LogicalTypeId::STRUCT: - for (auto &kv : StructType::GetChildTypes(type)) { - if (TypeHasExactRowCount(kv.second.id())) { - return true; - } - } - return false; - default: - return true; - } -} - -idx_t StructColumnReader::GroupRowsAvailable() { - for (idx_t i = 0; i < child_readers.size(); i++) { - if (TypeHasExactRowCount(child_readers[i]->Type())) { - return child_readers[i]->GroupRowsAvailable(); - } - } - return child_readers[0]->GroupRowsAvailable(); -} - -//===--------------------------------------------------------------------===// -// Decimal Column Reader -//===--------------------------------------------------------------------===// -template -struct DecimalParquetValueConversion { - static DUCKDB_PHYSICAL_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)dict.ptr; - return dict_ptr[offset]; - } - - static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - idx_t byte_len; - if (FIXED_LENGTH) { - byte_len = (idx_t)reader.Schema().type_length; /* sure, type length needs to be a signed int */ - } else { - byte_len = plain_data.read(); - } - plain_data.available(byte_len); - auto res = - ParquetDecimalUtils::ReadDecimalValue((const_data_ptr_t)plain_data.ptr, byte_len); - - plain_data.inc(byte_len); - return res; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - uint32_t decimal_len = FIXED_LENGTH ? reader.Schema().type_length : plain_data.read(); - plain_data.inc(decimal_len); - } -}; - -template -class DecimalColumnReader - : public TemplatedColumnReader> { - -public: - DecimalColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, // NOLINT - idx_t file_idx_p, idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader>( - reader, move(type_p), schema_p, file_idx_p, max_define_p, max_repeat_p) {}; - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { // NOLINT - this->dict = make_shared(this->reader.allocator, num_entries * sizeof(DUCKDB_PHYSICAL_TYPE)); - auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr; - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = - DecimalParquetValueConversion::PlainRead(*dictionary_data, *this); - } - } -}; - -template -static unique_ptr CreateDecimalReaderInternal(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define, idx_t max_repeat) { - switch (type_p.InternalType()) { - case PhysicalType::INT16: - return make_unique>(reader, type_p, schema_p, file_idx_p, max_define, - max_repeat); - case PhysicalType::INT32: - return make_unique>(reader, type_p, schema_p, file_idx_p, max_define, - max_repeat); - case PhysicalType::INT64: - return make_unique>(reader, type_p, schema_p, file_idx_p, max_define, - max_repeat); - case PhysicalType::INT128: - return make_unique>(reader, type_p, schema_p, file_idx_p, - max_define, max_repeat); - default: - throw InternalException("Unrecognized type for Decimal"); - } -} - -unique_ptr ParquetDecimalUtils::CreateReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define, idx_t max_repeat) { - if (schema_p.__isset.type_length) { - return CreateDecimalReaderInternal(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } else { - return CreateDecimalReaderInternal(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } -} - -//===--------------------------------------------------------------------===// -// UUID Column Reader -//===--------------------------------------------------------------------===// -struct UUIDValueConversion { - static hugeint_t DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto dict_ptr = (hugeint_t *)dict.ptr; - return dict_ptr[offset]; - } - - static hugeint_t ReadParquetUUID(const_data_ptr_t input) { - hugeint_t result; - result.lower = 0; - uint64_t unsigned_upper = 0; - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - unsigned_upper <<= 8; - unsigned_upper += input[i]; - } - for (idx_t i = sizeof(uint64_t); i < sizeof(hugeint_t); i++) { - result.lower <<= 8; - result.lower += input[i]; - } - result.upper = unsigned_upper; - result.upper ^= (int64_t(1) << 63); - return result; - } - - static hugeint_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - idx_t byte_len = sizeof(hugeint_t); - plain_data.available(byte_len); - auto res = ReadParquetUUID((const_data_ptr_t)plain_data.ptr); - - plain_data.inc(byte_len); - return res; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(sizeof(hugeint_t)); - } -}; - -class UUIDColumnReader : public TemplatedColumnReader { - -public: - UUIDColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader(reader, move(type_p), schema_p, file_idx_p, - max_define_p, max_repeat_p) {}; - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { // NOLINT - this->dict = make_shared(this->reader.allocator, num_entries * sizeof(hugeint_t)); - auto dict_ptr = (hugeint_t *)this->dict->ptr; - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = UUIDValueConversion::PlainRead(*dictionary_data, *this); - } - } -}; - -//===--------------------------------------------------------------------===// -// Interval Column Reader -//===--------------------------------------------------------------------===// -struct IntervalValueConversion { - static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12; - - static interval_t DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto dict_ptr = (interval_t *)dict.ptr; - return dict_ptr[offset]; - } - - static interval_t ReadParquetInterval(const_data_ptr_t input) { - interval_t result; - result.months = Load(input); - result.days = Load(input + sizeof(uint32_t)); - result.micros = int64_t(Load(input + sizeof(uint32_t) * 2)) * 1000; - return result; - } - - static interval_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - idx_t byte_len = PARQUET_INTERVAL_SIZE; - plain_data.available(byte_len); - auto res = ReadParquetInterval((const_data_ptr_t)plain_data.ptr); - - plain_data.inc(byte_len); - return res; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(PARQUET_INTERVAL_SIZE); - } -}; - -class IntervalColumnReader : public TemplatedColumnReader { - -public: - IntervalColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader(reader, move(type_p), schema_p, file_idx_p, - max_define_p, max_repeat_p) {}; - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) override { // NOLINT - this->dict = make_shared(this->reader.allocator, num_entries * sizeof(interval_t)); - auto dict_ptr = (interval_t *)this->dict->ptr; - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = IntervalValueConversion::PlainRead(*dictionary_data, *this); - } - } -}; - -//===--------------------------------------------------------------------===// -// Create Column Reader -//===--------------------------------------------------------------------===// -template -unique_ptr CreateDecimalReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define, - idx_t max_repeat) { - switch (type_p.InternalType()) { - case PhysicalType::INT16: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case PhysicalType::INT32: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case PhysicalType::INT64: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - throw NotImplementedException("Unimplemented internal type for CreateDecimalReader"); - } -} - -unique_ptr ColumnReader::CreateReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define, - idx_t max_repeat) { - switch (type_p.id()) { - case LogicalTypeId::BOOLEAN: - return make_unique(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UTINYINT: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::USMALLINT: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UINTEGER: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UBIGINT: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::TINYINT: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::SMALLINT: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::INTEGER: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::BIGINT: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::FLOAT: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::DOUBLE: - return make_unique>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::TIMESTAMP: - switch (schema_p.type) { - case Type::INT96: - return make_unique>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case Type::INT64: - switch (schema_p.converted_type) { - case ConvertedType::TIMESTAMP_MICROS: - return make_unique>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case ConvertedType::TIMESTAMP_MILLIS: - return make_unique>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - break; - } - default: - break; - } - break; - case LogicalTypeId::DATE: - return make_unique>(reader, type_p, schema_p, - file_idx_p, max_define, max_repeat); - case LogicalTypeId::TIME: - return make_unique>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: - return make_unique(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::DECIMAL: - // we have to figure out what kind of int we need - switch (schema_p.type) { - case Type::INT32: - return CreateDecimalReader(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case Type::INT64: - return CreateDecimalReader(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - return ParquetDecimalUtils::CreateReader(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - throw NotImplementedException("Unrecognized Parquet type for Decimal"); - } - break; - case LogicalTypeId::UUID: - return make_unique(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::INTERVAL: - return make_unique(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - break; - } - throw NotImplementedException(type_p.ToString()); -} - -} // namespace duckdb - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_rle_bp_encoder.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - -namespace duckdb { - -class RleBpEncoder { -public: - RleBpEncoder(uint32_t bit_width); - -public: - //! NOTE: Prepare is only required if a byte count is required BEFORE writing - //! This is the case with e.g. writing repetition/definition levels - //! If GetByteCount() is not required, prepare can be safely skipped - void BeginPrepare(uint32_t first_value); - void PrepareValue(uint32_t value); - void FinishPrepare(); - - void BeginWrite(Serializer &writer, uint32_t first_value); - void WriteValue(Serializer &writer, uint32_t value); - void FinishWrite(Serializer &writer); - - idx_t GetByteCount(); - -private: - //! meta information - uint32_t byte_width; - //! RLE run information - idx_t byte_count; - idx_t run_count; - idx_t current_run_count; - uint32_t last_value; - -private: - void FinishRun(); - void WriteRun(Serializer &writer); -}; - -} // namespace duckdb - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/common.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/common/mutex.hpp" -#include "duckdb/common/serializer/buffered_file_writer.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/types/hugeint.hpp" -#include "duckdb/common/types/time.hpp" -#include "duckdb/common/types/timestamp.hpp" -#include "duckdb/common/serializer/buffered_serializer.hpp" -#include "duckdb/common/operator/comparison_operators.hpp" -#endif - - - - - -namespace duckdb { - -using namespace duckdb_parquet; // NOLINT -using namespace duckdb_miniz; // NOLINT - -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Encoding; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::FileMetaData; -using duckdb_parquet::format::PageHeader; -using duckdb_parquet::format::PageType; -using ParquetRowGroup = duckdb_parquet::format::RowGroup; -using duckdb_parquet::format::Type; - -#define PARQUET_DEFINE_VALID 65535 - -static void VarintEncode(uint32_t val, Serializer &ser) { - do { - uint8_t byte = val & 127; - val >>= 7; - if (val != 0) { - byte |= 128; - } - ser.Write(byte); - } while (val != 0); -} - -static uint8_t GetVarintSize(uint32_t val) { - uint8_t res = 0; - do { - val >>= 7; - res++; - } while (val != 0); - return res; -} - -//===--------------------------------------------------------------------===// -// ColumnWriterStatistics -//===--------------------------------------------------------------------===// -ColumnWriterStatistics::~ColumnWriterStatistics() { -} - -string ColumnWriterStatistics::GetMin() { - return string(); -} - -string ColumnWriterStatistics::GetMax() { - return string(); -} - -string ColumnWriterStatistics::GetMinValue() { - return string(); -} - -string ColumnWriterStatistics::GetMaxValue() { - return string(); -} - -//===--------------------------------------------------------------------===// -// RleBpEncoder -//===--------------------------------------------------------------------===// -RleBpEncoder::RleBpEncoder(uint32_t bit_width) - : byte_width((bit_width + 7) / 8), byte_count(idx_t(-1)), run_count(idx_t(-1)) { -} - -// we always RLE everything (for now) -void RleBpEncoder::BeginPrepare(uint32_t first_value) { - byte_count = 0; - run_count = 1; - current_run_count = 1; - last_value = first_value; -} - -void RleBpEncoder::FinishRun() { - // last value, or value has changed - // write out the current run - byte_count += GetVarintSize(current_run_count << 1) + byte_width; - current_run_count = 1; - run_count++; -} - -void RleBpEncoder::PrepareValue(uint32_t value) { - if (value != last_value) { - FinishRun(); - last_value = value; - } else { - current_run_count++; - } -} - -void RleBpEncoder::FinishPrepare() { - FinishRun(); -} - -idx_t RleBpEncoder::GetByteCount() { - D_ASSERT(byte_count != idx_t(-1)); - return byte_count; -} - -void RleBpEncoder::BeginWrite(Serializer &writer, uint32_t first_value) { - // start the RLE runs - last_value = first_value; - current_run_count = 1; -} - -void RleBpEncoder::WriteRun(Serializer &writer) { - // write the header of the run - VarintEncode(current_run_count << 1, writer); - // now write the value - switch (byte_width) { - case 1: - writer.Write(last_value); - break; - case 2: - writer.Write(last_value); - break; - case 3: - writer.Write(last_value & 0xFF); - writer.Write((last_value >> 8) & 0xFF); - writer.Write((last_value >> 16) & 0xFF); - break; - case 4: - writer.Write(last_value); - break; - default: - throw InternalException("unsupported byte width for RLE encoding"); - } - current_run_count = 1; -} - -void RleBpEncoder::WriteValue(Serializer &writer, uint32_t value) { - if (value != last_value) { - WriteRun(writer); - last_value = value; - } else { - current_run_count++; - } -} - -void RleBpEncoder::FinishWrite(Serializer &writer) { - WriteRun(writer); -} - -//===--------------------------------------------------------------------===// -// ColumnWriter -//===--------------------------------------------------------------------===// -ColumnWriter::ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : writer(writer), schema_idx(schema_idx), schema_path(move(schema_path_p)), max_repeat(max_repeat), - max_define(max_define), can_have_nulls(can_have_nulls), null_count(0) { -} -ColumnWriter::~ColumnWriter() { -} - -ColumnWriterState::~ColumnWriterState() { -} - -void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data, - unique_ptr &compressed_buf) { - switch (writer.codec) { - case CompressionCodec::UNCOMPRESSED: - compressed_size = temp_writer.blob.size; - compressed_data = temp_writer.blob.data.get(); - break; - case CompressionCodec::SNAPPY: { - compressed_size = duckdb_snappy::MaxCompressedLength(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - duckdb_snappy::RawCompress((const char *)temp_writer.blob.data.get(), temp_writer.blob.size, - (char *)compressed_buf.get(), &compressed_size); - compressed_data = compressed_buf.get(); - D_ASSERT(compressed_size <= duckdb_snappy::MaxCompressedLength(temp_writer.blob.size)); - break; - } - case CompressionCodec::GZIP: { - MiniZStream s; - compressed_size = s.MaxCompressedLength(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - s.Compress((const char *)temp_writer.blob.data.get(), temp_writer.blob.size, (char *)compressed_buf.get(), - &compressed_size); - compressed_data = compressed_buf.get(); - break; - } - case CompressionCodec::ZSTD: { - compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size, - (const void *)temp_writer.blob.data.get(), temp_writer.blob.size, - ZSTD_CLEVEL_DEFAULT); - compressed_data = compressed_buf.get(); - break; - } - default: - throw InternalException("Unsupported codec for Parquet Writer"); - } - - if (compressed_size > idx_t(NumericLimits::Maximum())) { - throw InternalException("Parquet writer: %d compressed page size out of range for type integer", - temp_writer.blob.size); - } -} - -class ColumnWriterPageState { -public: - virtual ~ColumnWriterPageState() { - } -}; - -struct PageInformation { - idx_t offset = 0; - idx_t row_count = 0; - idx_t empty_count = 0; - idx_t estimated_page_size = 0; -}; - -struct PageWriteInformation { - PageHeader page_header; - unique_ptr temp_writer; - unique_ptr page_state; - idx_t write_page_idx = 0; - idx_t write_count = 0; - idx_t max_write_count = 0; - size_t compressed_size; - data_ptr_t compressed_data; - unique_ptr compressed_buf; -}; - -class StandardColumnWriterState : public ColumnWriterState { -public: - StandardColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx) - : row_group(row_group), col_idx(col_idx) { - page_info.emplace_back(); - } - ~StandardColumnWriterState() override = default; - - duckdb_parquet::format::RowGroup &row_group; - idx_t col_idx; - vector page_info; - vector write_info; - unique_ptr stats_state; - idx_t current_page = 0; -}; - -unique_ptr ColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) { - auto result = make_unique(row_group, row_group.columns.size()); - - duckdb_parquet::format::ColumnChunk column_chunk; - column_chunk.__isset.meta_data = true; - column_chunk.meta_data.codec = writer.codec; - column_chunk.meta_data.path_in_schema = schema_path; - column_chunk.meta_data.num_values = 0; - column_chunk.meta_data.type = writer.file_meta_data.schema[schema_idx].type; - row_group.columns.push_back(move(column_chunk)); - - return move(result); -} - -void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterState *parent, idx_t count, - idx_t max_repeat) { - if (!parent) { - // no repeat levels without a parent node - return; - } - while (state.repetition_levels.size() < parent->repetition_levels.size()) { - state.repetition_levels.push_back(parent->repetition_levels[state.repetition_levels.size()]); - } -} - -void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, - idx_t count, uint16_t define_value, uint16_t null_value) { - if (parent) { - // parent node: inherit definition level from the parent - idx_t vector_index = 0; - while (state.definition_levels.size() < parent->definition_levels.size()) { - idx_t current_index = state.definition_levels.size(); - if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) { - state.definition_levels.push_back(parent->definition_levels[current_index]); - } else if (validity.RowIsValid(vector_index)) { - state.definition_levels.push_back(define_value); - } else { - if (!can_have_nulls) { - throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); - } - null_count++; - state.definition_levels.push_back(null_value); - } - if (parent->is_empty.empty() || !parent->is_empty[current_index]) { - vector_index++; - } - } - } else { - // no parent: set definition levels only from this validity mask - for (idx_t i = 0; i < count; i++) { - if (validity.RowIsValid(i)) { - state.definition_levels.push_back(define_value); - } else { - if (!can_have_nulls) { - throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); - } - null_count++; - state.definition_levels.push_back(null_value); - } - } - } -} - -void ColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { - auto &state = (StandardColumnWriterState &)state_p; - auto &col_chunk = state.row_group.columns[state.col_idx]; - - idx_t start = 0; - idx_t vcount = parent ? parent->definition_levels.size() - state.definition_levels.size() : count; - idx_t parent_index = state.definition_levels.size(); - auto &validity = FlatVector::Validity(vector); - HandleRepeatLevels(state_p, parent, count, max_repeat); - HandleDefineLevels(state_p, parent, validity, count, max_define, max_define - 1); - - idx_t vector_index = 0; - for (idx_t i = start; i < vcount; i++) { - auto &page_info = state.page_info.back(); - page_info.row_count++; - col_chunk.meta_data.num_values++; - if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index + i]) { - page_info.empty_count++; - continue; - } - if (validity.RowIsValid(vector_index)) { - page_info.estimated_page_size += GetRowSize(vector, vector_index); - if (page_info.estimated_page_size >= MAX_UNCOMPRESSED_PAGE_SIZE) { - PageInformation new_info; - new_info.offset = page_info.offset + page_info.row_count; - state.page_info.push_back(new_info); - } - } - vector_index++; - } -} - -unique_ptr ColumnWriter::InitializePageState() { - return nullptr; -} - -void ColumnWriter::FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state) { -} - -duckdb_parquet::format::Encoding::type ColumnWriter::GetEncoding() { - return Encoding::PLAIN; -} - -void ColumnWriter::BeginWrite(ColumnWriterState &state_p) { - auto &state = (StandardColumnWriterState &)state_p; - - // set up the page write info - state.stats_state = InitializeStatsState(); - for (idx_t page_idx = 0; page_idx < state.page_info.size(); page_idx++) { - auto &page_info = state.page_info[page_idx]; - if (page_info.row_count == 0) { - D_ASSERT(page_idx + 1 == state.page_info.size()); - state.page_info.erase(state.page_info.begin() + page_idx); - break; - } - PageWriteInformation write_info; - // set up the header - auto &hdr = write_info.page_header; - hdr.compressed_page_size = 0; - hdr.uncompressed_page_size = 0; - hdr.type = PageType::DATA_PAGE; - hdr.__isset.data_page_header = true; - - hdr.data_page_header.num_values = page_info.row_count; - hdr.data_page_header.encoding = GetEncoding(); - hdr.data_page_header.definition_level_encoding = Encoding::RLE; - hdr.data_page_header.repetition_level_encoding = Encoding::RLE; - - write_info.temp_writer = make_unique(); - write_info.write_count = page_info.empty_count; - write_info.max_write_count = page_info.row_count; - write_info.page_state = InitializePageState(); - - write_info.compressed_size = 0; - write_info.compressed_data = nullptr; - - state.write_info.push_back(move(write_info)); - } - - // start writing the first page - NextPage(state_p); -} - -void ColumnWriter::WriteLevels(Serializer &temp_writer, const vector &levels, idx_t max_value, idx_t offset, - idx_t count) { - if (levels.empty() || count == 0) { - return; - } - - // write the levels using the RLE-BP encoding - auto bit_width = RleBpDecoder::ComputeBitWidth((max_value)); - RleBpEncoder rle_encoder(bit_width); - - rle_encoder.BeginPrepare(levels[offset]); - for (idx_t i = offset + 1; i < offset + count; i++) { - rle_encoder.PrepareValue(levels[i]); - } - rle_encoder.FinishPrepare(); - - // start off by writing the byte count as a uint32_t - temp_writer.Write(rle_encoder.GetByteCount()); - rle_encoder.BeginWrite(temp_writer, levels[offset]); - for (idx_t i = offset + 1; i < offset + count; i++) { - rle_encoder.WriteValue(temp_writer, levels[i]); - } - rle_encoder.FinishWrite(temp_writer); -} - -void ColumnWriter::NextPage(ColumnWriterState &state_p) { - auto &state = (StandardColumnWriterState &)state_p; - - if (state.current_page > 0) { - // need to flush the current page - FlushPage(state_p); - } - if (state.current_page >= state.write_info.size()) { - state.current_page = state.write_info.size() + 1; - return; - } - auto &page_info = state.page_info[state.current_page]; - auto &write_info = state.write_info[state.current_page]; - state.current_page++; - - auto &temp_writer = *write_info.temp_writer; - - // write the repetition levels - WriteLevels(temp_writer, state.repetition_levels, max_repeat, page_info.offset, page_info.row_count); - - // write the definition levels - WriteLevels(temp_writer, state.definition_levels, max_define, page_info.offset, page_info.row_count); -} - -void ColumnWriter::FlushPage(ColumnWriterState &state_p) { - auto &state = (StandardColumnWriterState &)state_p; - D_ASSERT(state.current_page > 0); - if (state.current_page > state.write_info.size()) { - return; - } - - // compress the page info - auto &write_info = state.write_info[state.current_page - 1]; - auto &temp_writer = *write_info.temp_writer; - auto &hdr = write_info.page_header; - - FlushPageState(temp_writer, write_info.page_state.get()); - - // now that we have finished writing the data we know the uncompressed size - if (temp_writer.blob.size > idx_t(NumericLimits::Maximum())) { - throw InternalException("Parquet writer: %d uncompressed page size out of range for type integer", - temp_writer.blob.size); - } - hdr.uncompressed_page_size = temp_writer.blob.size; - - // compress the data - CompressPage(temp_writer, write_info.compressed_size, write_info.compressed_data, write_info.compressed_buf); - hdr.compressed_page_size = write_info.compressed_size; - D_ASSERT(hdr.uncompressed_page_size > 0); - D_ASSERT(hdr.compressed_page_size > 0); - - if (write_info.compressed_buf) { - // if the data has been compressed, we no longer need the compressed data - D_ASSERT(write_info.compressed_buf.get() == write_info.compressed_data); - write_info.temp_writer.reset(); - } -} - -unique_ptr ColumnWriter::InitializeStatsState() { - return make_unique(); -} - -void ColumnWriter::WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, - ColumnWriterPageState *page_state, Vector &input_column, idx_t chunk_start, - idx_t chunk_end) { - throw InternalException("WriteVector unsupported for struct/list column writers"); -} - -idx_t ColumnWriter::GetRowSize(Vector &vector, idx_t index) { - throw InternalException("GetRowSize unsupported for struct/list column writers"); -} - -void ColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { - auto &state = (StandardColumnWriterState &)state_p; - - idx_t remaining = count; - idx_t offset = 0; - while (remaining > 0) { - auto &write_info = state.write_info[state.current_page - 1]; - if (!write_info.temp_writer) { - throw InternalException("Writes are not correctly aligned!?"); - } - auto &temp_writer = *write_info.temp_writer; - idx_t write_count = MinValue(remaining, write_info.max_write_count - write_info.write_count); - D_ASSERT(write_count > 0); - - WriteVector(temp_writer, state.stats_state.get(), write_info.page_state.get(), vector, offset, - offset + write_count); - - write_info.write_count += write_count; - if (write_info.write_count == write_info.max_write_count) { - NextPage(state_p); - } - offset += write_count; - remaining -= write_count; - } -} - -void ColumnWriter::SetParquetStatistics(StandardColumnWriterState &state, - duckdb_parquet::format::ColumnChunk &column_chunk) { - if (max_repeat == 0) { - column_chunk.meta_data.statistics.null_count = null_count; - column_chunk.meta_data.statistics.__isset.null_count = true; - column_chunk.meta_data.__isset.statistics = true; - } - // set min/max/min_value/max_value - // this code is not going to win any beauty contests, but well - auto min = state.stats_state->GetMin(); - if (!min.empty()) { - column_chunk.meta_data.statistics.min = move(min); - column_chunk.meta_data.statistics.__isset.min = true; - column_chunk.meta_data.__isset.statistics = true; - } - auto max = state.stats_state->GetMax(); - if (!max.empty()) { - column_chunk.meta_data.statistics.max = move(max); - column_chunk.meta_data.statistics.__isset.max = true; - column_chunk.meta_data.__isset.statistics = true; - } - auto min_value = state.stats_state->GetMinValue(); - if (!min_value.empty()) { - column_chunk.meta_data.statistics.min_value = move(min_value); - column_chunk.meta_data.statistics.__isset.min_value = true; - column_chunk.meta_data.__isset.statistics = true; - } - auto max_value = state.stats_state->GetMaxValue(); - if (!max_value.empty()) { - column_chunk.meta_data.statistics.max_value = move(max_value); - column_chunk.meta_data.statistics.__isset.max_value = true; - column_chunk.meta_data.__isset.statistics = true; - } -} - -void ColumnWriter::FinalizeWrite(ColumnWriterState &state_p) { - auto &state = (StandardColumnWriterState &)state_p; - auto &column_chunk = state.row_group.columns[state.col_idx]; - - // flush the last page (if any remains) - FlushPage(state); - // flush the dictionary - FlushDictionary(state, state.stats_state.get()); - - // record the start position of the pages for this column - column_chunk.meta_data.data_page_offset = writer.writer->GetTotalWritten(); - SetParquetStatistics(state, column_chunk); - - // write the individual pages to disk - for (auto &write_info : state.write_info) { - D_ASSERT(write_info.page_header.uncompressed_page_size > 0); - write_info.page_header.write(writer.protocol.get()); - writer.writer->WriteData(write_info.compressed_data, write_info.compressed_size); - } - column_chunk.meta_data.total_compressed_size = - writer.writer->GetTotalWritten() - column_chunk.meta_data.data_page_offset; -} - -void ColumnWriter::WriteDictionary(ColumnWriterState &state_p, unique_ptr temp_writer, - idx_t row_count) { - auto &state = (StandardColumnWriterState &)state_p; - D_ASSERT(temp_writer); - D_ASSERT(temp_writer->blob.size > 0); - - // write the dictionary page header - PageWriteInformation write_info; - // set up the header - auto &hdr = write_info.page_header; - hdr.uncompressed_page_size = temp_writer->blob.size; - hdr.type = PageType::DICTIONARY_PAGE; - hdr.__isset.dictionary_page_header = true; - - hdr.dictionary_page_header.encoding = Encoding::PLAIN; - hdr.dictionary_page_header.is_sorted = false; - hdr.dictionary_page_header.num_values = row_count; - - write_info.temp_writer = move(temp_writer); - write_info.write_count = 0; - write_info.max_write_count = 0; - - // compress the contents of the dictionary page - CompressPage(*write_info.temp_writer, write_info.compressed_size, write_info.compressed_data, - write_info.compressed_buf); - hdr.compressed_page_size = write_info.compressed_size; - - // insert the dictionary page as the first page to write for this column - state.write_info.insert(state.write_info.begin(), move(write_info)); -} - -void ColumnWriter::FlushDictionary(ColumnWriterState &state, ColumnWriterStatistics *stats) { - // nop: standard pages do not have a dictionary -} - -//===--------------------------------------------------------------------===// -// Standard Column Writer -//===--------------------------------------------------------------------===// -template -class NumericStatisticsState : public ColumnWriterStatistics { -public: - NumericStatisticsState() : min(NumericLimits::Maximum()), max(NumericLimits::Minimum()) { - } - - T min; - T max; - -public: - bool HasStats() { - return min <= max; - } - - string GetMin() override { - return NumericLimits::IsSigned() ? GetMinValue() : string(); - } - string GetMax() override { - return NumericLimits::IsSigned() ? GetMaxValue() : string(); - } - string GetMinValue() override { - return HasStats() ? string((char *)&min, sizeof(T)) : string(); - } - string GetMaxValue() override { - return HasStats() ? string((char *)&max, sizeof(T)) : string(); - } -}; - -struct BaseParquetOperator { - template - static unique_ptr InitializeStats() { - return make_unique>(); - } - - template - static void HandleStats(ColumnWriterStatistics *stats, SRC source_value, TGT target_value) { - auto &numeric_stats = (NumericStatisticsState &)*stats; - if (LessThan::Operation(target_value, numeric_stats.min)) { - numeric_stats.min = target_value; - } - if (GreaterThan::Operation(target_value, numeric_stats.max)) { - numeric_stats.max = target_value; - } - } -}; - -struct ParquetCastOperator : public BaseParquetOperator { - template - static TGT Operation(SRC input) { - return TGT(input); - } -}; - -struct ParquetTimestampNSOperator : public BaseParquetOperator { - template - static TGT Operation(SRC input) { - return Timestamp::FromEpochNanoSeconds(input).value; - } -}; - -struct ParquetTimestampSOperator : public BaseParquetOperator { - template - static TGT Operation(SRC input) { - return Timestamp::FromEpochSeconds(input).value; - } -}; - -struct ParquetHugeintOperator { - template - static TGT Operation(SRC input) { - return Hugeint::Cast(input); - } - - template - static unique_ptr InitializeStats() { - return make_unique(); - } - - template - static void HandleStats(ColumnWriterStatistics *stats, SRC source_value, TGT target_value) { - } -}; - -template -static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, idx_t chunk_start, idx_t chunk_end, - ValidityMask &mask, Serializer &ser) { - auto *ptr = FlatVector::GetData(col); - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - TGT target_value = OP::template Operation(ptr[r]); - OP::template HandleStats(stats, ptr[r], target_value); - ser.Write(target_value); - } - } -} - -template -class StandardColumnWriter : public ColumnWriter { -public: - StandardColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, // NOLINT - idx_t max_repeat, idx_t max_define, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~StandardColumnWriter() override = default; - -public: - unique_ptr InitializeStatsState() override { - return OP::template InitializeStats(); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - TemplatedWritePlain(input_column, stats, chunk_start, chunk_end, mask, temp_writer); - } - - idx_t GetRowSize(Vector &vector, idx_t index) override { - return sizeof(TGT); - } -}; - -//===--------------------------------------------------------------------===// -// Boolean Column Writer -//===--------------------------------------------------------------------===// -class BooleanStatisticsState : public ColumnWriterStatistics { -public: - BooleanStatisticsState() : min(true), max(false) { - } - - bool min; - bool max; - -public: - bool HasStats() { - return !(min && !max); - } - - string GetMin() override { - return GetMinValue(); - } - string GetMax() override { - return GetMaxValue(); - } - string GetMinValue() override { - return HasStats() ? string((char *)&min, sizeof(bool)) : string(); - } - string GetMaxValue() override { - return HasStats() ? string((char *)&max, sizeof(bool)) : string(); - } -}; - -class BooleanWriterPageState : public ColumnWriterPageState { -public: - uint8_t byte = 0; - uint8_t byte_pos = 0; -}; - -class BooleanColumnWriter : public ColumnWriter { -public: - BooleanColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~BooleanColumnWriter() override = default; - -public: - unique_ptr InitializeStatsState() override { - return make_unique(); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *state_p, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &stats = (BooleanStatisticsState &)*stats_p; - auto &state = (BooleanWriterPageState &)*state_p; - auto &mask = FlatVector::Validity(input_column); - - auto *ptr = FlatVector::GetData(input_column); - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - // only encode if non-null - if (ptr[r]) { - stats.max = true; - state.byte |= 1 << state.byte_pos; - } else { - stats.min = false; - } - state.byte_pos++; - - if (state.byte_pos == 8) { - temp_writer.Write(state.byte); - state.byte = 0; - state.byte_pos = 0; - } - } - } - } - - unique_ptr InitializePageState() override { - return make_unique(); - } - - void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state_p) override { - auto &state = (BooleanWriterPageState &)*state_p; - if (state.byte_pos > 0) { - temp_writer.Write(state.byte); - state.byte = 0; - state.byte_pos = 0; - } - } - - idx_t GetRowSize(Vector &vector, idx_t index) override { - return sizeof(bool); - } -}; - -//===--------------------------------------------------------------------===// -// Decimal Column Writer -//===--------------------------------------------------------------------===// -static void WriteParquetDecimal(hugeint_t input, data_ptr_t result) { - bool positive = input >= 0; - // numbers are stored as two's complement so some muckery is required - if (!positive) { - input = NumericLimits::Maximum() + input + 1; - } - uint64_t high_bytes = uint64_t(input.upper); - uint64_t low_bytes = input.lower; - - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - auto shift_count = (sizeof(uint64_t) - i - 1) * 8; - result[i] = (high_bytes >> shift_count) & 0xFF; - } - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - auto shift_count = (sizeof(uint64_t) - i - 1) * 8; - result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF; - } - if (!positive) { - result[0] |= 0x80; - } -} - -class FixedDecimalStatistics : public ColumnWriterStatistics { -public: - FixedDecimalStatistics() : min(NumericLimits::Maximum()), max(NumericLimits::Minimum()) { - } - - hugeint_t min; - hugeint_t max; - -public: - string GetStats(hugeint_t &input) { - data_t buffer[16]; - WriteParquetDecimal(input, buffer); - return string((char *)buffer, 16); - } - - bool HasStats() { - return min <= max; - } - - void Update(hugeint_t &val) { - if (LessThan::Operation(val, min)) { - min = val; - } - if (GreaterThan::Operation(val, max)) { - max = val; - } - } - - string GetMin() override { - return GetMinValue(); - } - string GetMax() override { - return GetMaxValue(); - } - string GetMinValue() override { - return HasStats() ? GetStats(min) : string(); - } - string GetMaxValue() override { - return HasStats() ? GetStats(max) : string(); - } -}; - -class FixedDecimalColumnWriter : public ColumnWriter { -public: - FixedDecimalColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~FixedDecimalColumnWriter() override = default; - -public: - unique_ptr InitializeStatsState() override { - return make_unique(); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - auto *ptr = FlatVector::GetData(input_column); - auto &stats = (FixedDecimalStatistics &)*stats_p; - - data_t temp_buffer[16]; - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - stats.Update(ptr[r]); - WriteParquetDecimal(ptr[r], temp_buffer); - temp_writer.WriteData(temp_buffer, 16); - } - } - } - - idx_t GetRowSize(Vector &vector, idx_t index) override { - return sizeof(hugeint_t); - } -}; - -//===--------------------------------------------------------------------===// -// UUID Column Writer -//===--------------------------------------------------------------------===// -class UUIDColumnWriter : public ColumnWriter { - static constexpr const idx_t PARQUET_UUID_SIZE = 16; - -public: - UUIDColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~UUIDColumnWriter() override = default; - -public: - static void WriteParquetUUID(hugeint_t input, data_ptr_t result) { - uint64_t high_bytes = input.upper ^ (int64_t(1) << 63); - uint64_t low_bytes = input.lower; - - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - auto shift_count = (sizeof(uint64_t) - i - 1) * 8; - result[i] = (high_bytes >> shift_count) & 0xFF; - } - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - auto shift_count = (sizeof(uint64_t) - i - 1) * 8; - result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF; - } - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - auto *ptr = FlatVector::GetData(input_column); - - data_t temp_buffer[PARQUET_UUID_SIZE]; - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - WriteParquetUUID(ptr[r], temp_buffer); - temp_writer.WriteData(temp_buffer, PARQUET_UUID_SIZE); - } - } - } - - idx_t GetRowSize(Vector &vector, idx_t index) override { - return PARQUET_UUID_SIZE; - } -}; - -//===--------------------------------------------------------------------===// -// Interval Column Writer -//===--------------------------------------------------------------------===// -class IntervalColumnWriter : public ColumnWriter { - static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12; - -public: - IntervalColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~IntervalColumnWriter() override = default; - -public: - static void WriteParquetInterval(interval_t input, data_ptr_t result) { - if (input.days < 0 || input.months < 0 || input.micros < 0) { - throw IOException("Parquet files do not support negative intervals"); - } - Store(input.months, result); - Store(input.days, result + sizeof(uint32_t)); - Store(input.micros / 1000, result + sizeof(uint32_t) * 2); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - auto *ptr = FlatVector::GetData(input_column); - - data_t temp_buffer[PARQUET_INTERVAL_SIZE]; - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - WriteParquetInterval(ptr[r], temp_buffer); - temp_writer.WriteData(temp_buffer, PARQUET_INTERVAL_SIZE); - } - } - } - - idx_t GetRowSize(Vector &vector, idx_t index) override { - return PARQUET_INTERVAL_SIZE; - } -}; - -//===--------------------------------------------------------------------===// -// String Column Writer -//===--------------------------------------------------------------------===// -class StringStatisticsState : public ColumnWriterStatistics { - static constexpr const idx_t MAX_STRING_STATISTICS_SIZE = 10000; - -public: - StringStatisticsState() : has_stats(false), values_too_big(false), min(), max() { - } - - bool has_stats; - bool values_too_big; - string min; - string max; - -public: - bool HasStats() { - return has_stats; - } - - void Update(const string_t &val) { - if (values_too_big) { - return; - } - auto str_len = val.GetSize(); - if (str_len > MAX_STRING_STATISTICS_SIZE) { - // we avoid gathering stats when individual string values are too large - // this is because the statistics are copied into the Parquet file meta data in uncompressed format - // ideally we avoid placing several mega or giga-byte long strings there - // we put a threshold of 10KB, if we see strings that exceed this threshold we avoid gathering stats - values_too_big = true; - min = string(); - max = string(); - return; - } - if (!has_stats || LessThan::Operation(val, string_t(min))) { - min = val.GetString(); - } - if (!has_stats || GreaterThan::Operation(val, string_t(max))) { - max = val.GetString(); - } - has_stats = true; - } - - string GetMin() override { - return GetMinValue(); - } - string GetMax() override { - return GetMaxValue(); - } - string GetMinValue() override { - return HasStats() ? min : string(); - } - string GetMaxValue() override { - return HasStats() ? max : string(); - } -}; - -class StringColumnWriter : public ColumnWriter { -public: - StringColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~StringColumnWriter() override = default; - -public: - unique_ptr InitializeStatsState() override { - return make_unique(); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - auto &stats = (StringStatisticsState &)*stats_p; - - auto *ptr = FlatVector::GetData(input_column); - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - stats.Update(ptr[r]); - temp_writer.Write(ptr[r].GetSize()); - temp_writer.WriteData((const_data_ptr_t)ptr[r].GetDataUnsafe(), ptr[r].GetSize()); - } - } - } - - idx_t GetRowSize(Vector &vector, idx_t index) override { - auto strings = FlatVector::GetData(vector); - return strings[index].GetSize(); - } -}; - -//===--------------------------------------------------------------------===// -// Enum Column Writer -//===--------------------------------------------------------------------===// -class EnumWriterPageState : public ColumnWriterPageState { -public: - explicit EnumWriterPageState(uint32_t bit_width) : encoder(bit_width), written_value(false) { - } - - RleBpEncoder encoder; - bool written_value; -}; - -class EnumColumnWriter : public ColumnWriter { -public: - EnumColumnWriter(ParquetWriter &writer, LogicalType enum_type_p, idx_t schema_idx, vector schema_path_p, - idx_t max_repeat, idx_t max_define, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls), - enum_type(move(enum_type_p)) { - bit_width = RleBpDecoder::ComputeBitWidth(EnumType::GetSize(enum_type)); - } - ~EnumColumnWriter() override = default; - - LogicalType enum_type; - uint32_t bit_width; - -public: - unique_ptr InitializeStatsState() override { - return make_unique(); - } - - template - void WriteEnumInternal(Serializer &temp_writer, Vector &input_column, idx_t chunk_start, idx_t chunk_end, - EnumWriterPageState &page_state) { - auto &mask = FlatVector::Validity(input_column); - auto *ptr = FlatVector::GetData(input_column); - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - if (!page_state.written_value) { - // first value - // write the bit-width as a one-byte entry - temp_writer.Write(bit_width); - // now begin writing the actual value - page_state.encoder.BeginWrite(temp_writer, ptr[r]); - page_state.written_value = true; - } else { - page_state.encoder.WriteValue(temp_writer, ptr[r]); - } - } - } - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state_p, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &page_state = (EnumWriterPageState &)*page_state_p; - switch (enum_type.InternalType()) { - case PhysicalType::UINT8: - WriteEnumInternal(temp_writer, input_column, chunk_start, chunk_end, page_state); - break; - case PhysicalType::UINT16: - WriteEnumInternal(temp_writer, input_column, chunk_start, chunk_end, page_state); - break; - case PhysicalType::UINT32: - WriteEnumInternal(temp_writer, input_column, chunk_start, chunk_end, page_state); - break; - default: - throw InternalException("Unsupported internal enum type"); - } - } - - unique_ptr InitializePageState() override { - return make_unique(bit_width); - } - - void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state_p) override { - auto &page_state = (EnumWriterPageState &)*state_p; - if (!page_state.written_value) { - // all values are null - // just write the bit width - temp_writer.Write(bit_width); - return; - } - page_state.encoder.FinishWrite(temp_writer); - } - - duckdb_parquet::format::Encoding::type GetEncoding() override { - return Encoding::RLE_DICTIONARY; - } - - void FlushDictionary(ColumnWriterState &state, ColumnWriterStatistics *stats_p) override { - auto &stats = (StringStatisticsState &)*stats_p; - // write the enum values to a dictionary page - auto &enum_values = EnumType::GetValuesInsertOrder(enum_type); - auto enum_count = EnumType::GetSize(enum_type); - auto string_values = FlatVector::GetData(enum_values); - // first write the contents of the dictionary page to a temporary buffer - auto temp_writer = make_unique(); - for (idx_t r = 0; r < enum_count; r++) { - D_ASSERT(!FlatVector::IsNull(enum_values, r)); - // update the statistics - stats.Update(string_values[r]); - // write this string value to the dictionary - temp_writer->Write(string_values[r].GetSize()); - temp_writer->WriteData((const_data_ptr_t)string_values[r].GetDataUnsafe(), string_values[r].GetSize()); - } - // flush the dictionary page and add it to the to-be-written pages - WriteDictionary(state, move(temp_writer), enum_count); - } - - idx_t GetRowSize(Vector &vector, idx_t index) override { - return (bit_width + 7) / 8; - } -}; - -//===--------------------------------------------------------------------===// -// Struct Column Writer -//===--------------------------------------------------------------------===// -class StructColumnWriter : public ColumnWriter { -public: - StructColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, vector> child_writers_p, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls), - child_writers(move(child_writers_p)) { - } - ~StructColumnWriter() override = default; - - vector> child_writers; - -public: - unique_ptr InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override; - void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; - - void BeginWrite(ColumnWriterState &state) override; - void Write(ColumnWriterState &state, Vector &vector, idx_t count) override; - void FinalizeWrite(ColumnWriterState &state) override; -}; - -class StructColumnWriterState : public ColumnWriterState { -public: - StructColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx) - : row_group(row_group), col_idx(col_idx) { - } - ~StructColumnWriterState() override = default; - - duckdb_parquet::format::RowGroup &row_group; - idx_t col_idx; - vector> child_states; -}; - -unique_ptr StructColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) { - auto result = make_unique(row_group, row_group.columns.size()); - - result->child_states.reserve(child_writers.size()); - for (auto &child_writer : child_writers) { - result->child_states.push_back(child_writer->InitializeWriteState(row_group)); - } - return move(result); -} - -void StructColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { - auto &state = (StructColumnWriterState &)state_p; - - auto &validity = FlatVector::Validity(vector); - if (parent) { - // propagate empty entries from the parent - while (state.is_empty.size() < parent->is_empty.size()) { - state.is_empty.push_back(parent->is_empty[state.is_empty.size()]); - } - } - HandleRepeatLevels(state_p, parent, count, max_repeat); - HandleDefineLevels(state_p, parent, validity, count, PARQUET_DEFINE_VALID, max_define - 1); - auto &child_vectors = StructVector::GetEntries(vector); - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - child_writers[child_idx]->Prepare(*state.child_states[child_idx], &state_p, *child_vectors[child_idx], count); - } -} - -void StructColumnWriter::BeginWrite(ColumnWriterState &state_p) { - auto &state = (StructColumnWriterState &)state_p; - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - child_writers[child_idx]->BeginWrite(*state.child_states[child_idx]); - } -} - -void StructColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { - auto &state = (StructColumnWriterState &)state_p; - auto &child_vectors = StructVector::GetEntries(vector); - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - child_writers[child_idx]->Write(*state.child_states[child_idx], *child_vectors[child_idx], count); - } -} - -void StructColumnWriter::FinalizeWrite(ColumnWriterState &state_p) { - auto &state = (StructColumnWriterState &)state_p; - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - // we add the null count of the struct to the null count of the children - child_writers[child_idx]->null_count += null_count; - child_writers[child_idx]->FinalizeWrite(*state.child_states[child_idx]); - } -} - -//===--------------------------------------------------------------------===// -// List Column Writer -//===--------------------------------------------------------------------===// -class ListColumnWriter : public ColumnWriter { -public: - ListColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, unique_ptr child_writer_p, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, move(schema_path_p), max_repeat, max_define, can_have_nulls), - child_writer(move(child_writer_p)) { - } - ~ListColumnWriter() override = default; - - unique_ptr child_writer; - -public: - unique_ptr InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override; - void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; - - void BeginWrite(ColumnWriterState &state) override; - void Write(ColumnWriterState &state, Vector &vector, idx_t count) override; - void FinalizeWrite(ColumnWriterState &state) override; -}; - -class ListColumnWriterState : public ColumnWriterState { -public: - ListColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx) - : row_group(row_group), col_idx(col_idx) { - } - ~ListColumnWriterState() override = default; - - duckdb_parquet::format::RowGroup &row_group; - idx_t col_idx; - unique_ptr child_state; - idx_t parent_index = 0; -}; - -unique_ptr ListColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) { - auto result = make_unique(row_group, row_group.columns.size()); - result->child_state = child_writer->InitializeWriteState(row_group); - return move(result); -} - -void ListColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { - auto &state = (ListColumnWriterState &)state_p; - - auto list_data = FlatVector::GetData(vector); - auto &validity = FlatVector::Validity(vector); - - // write definition levels and repeats - idx_t start = 0; - idx_t vcount = parent ? parent->definition_levels.size() - state.parent_index : count; - idx_t vector_index = 0; - for (idx_t i = start; i < vcount; i++) { - idx_t parent_index = state.parent_index + i; - if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index]) { - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.repetition_levels.push_back(parent->repetition_levels[parent_index]); - state.is_empty.push_back(true); - continue; - } - auto first_repeat_level = - parent && !parent->repetition_levels.empty() ? parent->repetition_levels[parent_index] : max_repeat; - if (parent && parent->definition_levels[parent_index] != PARQUET_DEFINE_VALID) { - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.repetition_levels.push_back(first_repeat_level); - state.is_empty.push_back(true); - } else if (validity.RowIsValid(vector_index)) { - // push the repetition levels - if (list_data[vector_index].length == 0) { - state.definition_levels.push_back(max_define); - state.is_empty.push_back(true); - } else { - state.definition_levels.push_back(PARQUET_DEFINE_VALID); - state.is_empty.push_back(false); - } - state.repetition_levels.push_back(first_repeat_level); - for (idx_t k = 1; k < list_data[vector_index].length; k++) { - state.repetition_levels.push_back(max_repeat + 1); - state.definition_levels.push_back(PARQUET_DEFINE_VALID); - state.is_empty.push_back(false); - } - } else { - if (!can_have_nulls) { - throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); - } - state.definition_levels.push_back(max_define - 1); - state.repetition_levels.push_back(first_repeat_level); - state.is_empty.push_back(true); - } - vector_index++; - } - state.parent_index += vcount; - - auto &list_child = ListVector::GetEntry(vector); - auto list_count = ListVector::GetListSize(vector); - child_writer->Prepare(*state.child_state, &state_p, list_child, list_count); -} - -void ListColumnWriter::BeginWrite(ColumnWriterState &state_p) { - auto &state = (ListColumnWriterState &)state_p; - child_writer->BeginWrite(*state.child_state); -} - -void ListColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { - auto &state = (ListColumnWriterState &)state_p; - - auto &list_child = ListVector::GetEntry(vector); - auto list_count = ListVector::GetListSize(vector); - child_writer->Write(*state.child_state, list_child, list_count); -} - -void ListColumnWriter::FinalizeWrite(ColumnWriterState &state_p) { - auto &state = (ListColumnWriterState &)state_p; - child_writer->FinalizeWrite(*state.child_state); -} - -//===--------------------------------------------------------------------===// -// Create Column Writer -//===--------------------------------------------------------------------===// -unique_ptr ColumnWriter::CreateWriterRecursive(vector &schemas, - ParquetWriter &writer, const LogicalType &type, - const string &name, vector schema_path, - idx_t max_repeat, idx_t max_define, bool can_have_nulls) { - auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED; - if (!can_have_nulls) { - max_define--; - } - idx_t schema_idx = schemas.size(); - if (type.id() == LogicalTypeId::STRUCT) { - auto &child_types = StructType::GetChildTypes(type); - // set up the schema element for this struct - duckdb_parquet::format::SchemaElement schema_element; - schema_element.repetition_type = null_type; - schema_element.num_children = child_types.size(); - schema_element.__isset.num_children = true; - schema_element.__isset.type = false; - schema_element.__isset.repetition_type = true; - schema_element.name = name; - schemas.push_back(move(schema_element)); - schema_path.push_back(name); - - // construct the child types recursively - vector> child_writers; - child_writers.reserve(child_types.size()); - for (auto &child_type : child_types) { - child_writers.push_back(CreateWriterRecursive(schemas, writer, child_type.second, child_type.first, - schema_path, max_repeat, max_define + 1)); - } - return make_unique(writer, schema_idx, move(schema_path), max_repeat, max_define, - move(child_writers), can_have_nulls); - } - if (type.id() == LogicalTypeId::LIST) { - auto &child_type = ListType::GetChildType(type); - // set up the two schema elements for the list - // for some reason we only set the converted type in the OPTIONAL element - // first an OPTIONAL element - duckdb_parquet::format::SchemaElement optional_element; - optional_element.repetition_type = null_type; - optional_element.num_children = 1; - optional_element.converted_type = ConvertedType::LIST; - optional_element.__isset.num_children = true; - optional_element.__isset.type = false; - optional_element.__isset.repetition_type = true; - optional_element.__isset.converted_type = true; - optional_element.name = name; - schemas.push_back(move(optional_element)); - schema_path.push_back(name); - - // then a REPEATED element - duckdb_parquet::format::SchemaElement repeated_element; - repeated_element.repetition_type = FieldRepetitionType::REPEATED; - repeated_element.num_children = 1; - repeated_element.__isset.num_children = true; - repeated_element.__isset.type = false; - repeated_element.__isset.repetition_type = true; - repeated_element.name = "list"; - schemas.push_back(move(repeated_element)); - schema_path.emplace_back("list"); - - auto child_writer = - CreateWriterRecursive(schemas, writer, child_type, "element", schema_path, max_repeat + 1, max_define + 2); - return make_unique(writer, schema_idx, move(schema_path), max_repeat, max_define, - move(child_writer), can_have_nulls); - } - if (type.id() == LogicalTypeId::MAP) { - // map type - // maps are stored as follows: - // group (MAP) { - // repeated group key_value { - // required key; - // value; - // } - // } - // top map element - duckdb_parquet::format::SchemaElement top_element; - top_element.repetition_type = null_type; - top_element.num_children = 1; - top_element.converted_type = ConvertedType::MAP; - top_element.__isset.repetition_type = true; - top_element.__isset.num_children = true; - top_element.__isset.converted_type = true; - top_element.__isset.type = false; - top_element.name = name; - schemas.push_back(move(top_element)); - schema_path.push_back(name); - - // key_value element - duckdb_parquet::format::SchemaElement kv_element; - kv_element.repetition_type = FieldRepetitionType::REPEATED; - kv_element.num_children = 2; - kv_element.__isset.repetition_type = true; - kv_element.__isset.num_children = true; - kv_element.__isset.type = false; - kv_element.name = "key_value"; - schemas.push_back(move(kv_element)); - schema_path.emplace_back("key_value"); - - // construct the child types recursively - vector kv_types {MapType::KeyType(type), MapType::ValueType(type)}; - vector kv_names {"key", "value"}; - vector> child_writers; - child_writers.reserve(2); - for (idx_t i = 0; i < 2; i++) { - // key needs to be marked as REQUIRED - bool is_key = i == 0; - auto child_writer = CreateWriterRecursive(schemas, writer, kv_types[i], kv_names[i], schema_path, - max_repeat + 1, max_define + 2, !is_key); - auto list_writer = make_unique(writer, schema_idx, schema_path, max_repeat, max_define, - move(child_writer), can_have_nulls); - child_writers.push_back(move(list_writer)); - } - return make_unique(writer, schema_idx, schema_path, max_repeat, max_define, - move(child_writers), can_have_nulls); - } - duckdb_parquet::format::SchemaElement schema_element; - schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type); - schema_element.repetition_type = null_type; - schema_element.num_children = 0; - schema_element.__isset.num_children = true; - schema_element.__isset.type = true; - schema_element.__isset.repetition_type = true; - schema_element.name = name; - ParquetWriter::SetSchemaProperties(type, schema_element); - schemas.push_back(move(schema_element)); - schema_path.push_back(name); - - switch (type.id()) { - case LogicalTypeId::BOOLEAN: - return make_unique(writer, schema_idx, move(schema_path), max_repeat, max_define, - can_have_nulls); - case LogicalTypeId::TINYINT: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::SMALLINT: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::INTEGER: - case LogicalTypeId::DATE: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::BIGINT: - case LogicalTypeId::TIME: - case LogicalTypeId::TIME_TZ: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_TZ: - case LogicalTypeId::TIMESTAMP_MS: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::HUGEINT: - return make_unique>( - writer, schema_idx, move(schema_path), max_repeat, max_define, can_have_nulls); - case LogicalTypeId::TIMESTAMP_NS: - return make_unique>( - writer, schema_idx, move(schema_path), max_repeat, max_define, can_have_nulls); - case LogicalTypeId::TIMESTAMP_SEC: - return make_unique>( - writer, schema_idx, move(schema_path), max_repeat, max_define, can_have_nulls); - case LogicalTypeId::UTINYINT: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::USMALLINT: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::UINTEGER: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::UBIGINT: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::FLOAT: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::DOUBLE: - return make_unique>(writer, schema_idx, move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::DECIMAL: - switch (type.InternalType()) { - case PhysicalType::INT16: - return make_unique>(writer, schema_idx, move(schema_path), - max_repeat, max_define, can_have_nulls); - case PhysicalType::INT32: - return make_unique>(writer, schema_idx, move(schema_path), - max_repeat, max_define, can_have_nulls); - case PhysicalType::INT64: - return make_unique>(writer, schema_idx, move(schema_path), - max_repeat, max_define, can_have_nulls); - default: - return make_unique(writer, schema_idx, move(schema_path), max_repeat, max_define, - can_have_nulls); - } - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: - case LogicalTypeId::JSON: - return make_unique(writer, schema_idx, move(schema_path), max_repeat, max_define, - can_have_nulls); - case LogicalTypeId::UUID: - return make_unique(writer, schema_idx, move(schema_path), max_repeat, max_define, - can_have_nulls); - case LogicalTypeId::INTERVAL: - return make_unique(writer, schema_idx, move(schema_path), max_repeat, max_define, - can_have_nulls); - case LogicalTypeId::ENUM: - return make_unique(writer, type, schema_idx, move(schema_path), max_repeat, max_define, - can_have_nulls); - default: - throw InternalException("Unsupported type \"%s\" in Parquet writer", type.ToString()); - } -} - -} // namespace duckdb -#define DUCKDB_EXTENSION_MAIN - -#include -#include -#include -#include - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_metadata.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -namespace duckdb { - -class ParquetMetaDataFunction : public TableFunction { -public: - ParquetMetaDataFunction(); -}; - -class ParquetSchemaFunction : public TableFunction { -public: - ParquetSchemaFunction(); -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// zstd_file_system.hpp -// -// -//===----------------------------------------------------------------------===// - - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/compressed_file_system.hpp" -#endif - -namespace duckdb { - -class ZStdFileSystem : public CompressedFileSystem { -public: - unique_ptr OpenCompressedFile(unique_ptr handle, bool write) override; - - std::string GetName() const override { - return "ZStdFileSystem"; - } - - unique_ptr CreateStream() override; - idx_t InBufferSize() override; - idx_t OutBufferSize() override; -}; - -} // namespace duckdb - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#include "duckdb/function/copy_function.hpp" -#include "duckdb/function/table_function.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" - -#include "duckdb/common/enums/file_compression_type.hpp" -#include "duckdb/main/config.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" - -#include "duckdb/storage/statistics/base_statistics.hpp" - -#include "duckdb/main/client_context.hpp" -#include "duckdb/catalog/catalog.hpp" -#endif - -namespace duckdb { - -struct ParquetReadBindData : public TableFunctionData { - shared_ptr initial_reader; - vector files; - vector column_ids; - atomic chunk_count; - atomic cur_file; - vector names; - vector types; -}; - -struct ParquetReadLocalState : public LocalTableFunctionState { - shared_ptr reader; - ParquetReaderScanState scan_state; - bool is_parallel; - idx_t batch_index; - idx_t file_index; - vector column_ids; - TableFilterSet *table_filters; -}; - -struct ParquetReadGlobalState : public GlobalTableFunctionState { - mutex lock; - shared_ptr current_reader; - idx_t batch_index; - idx_t file_index; - idx_t row_group_index; - idx_t max_threads; - - idx_t MaxThreads() const override { - return max_threads; - } -}; - -class ParquetScanFunction { -public: - static TableFunctionSet GetFunctionSet() { - TableFunctionSet set("parquet_scan"); - TableFunction table_function({LogicalType::VARCHAR}, ParquetScanImplementation, ParquetScanBind, - ParquetScanInitGlobal, ParquetScanInitLocal); - table_function.statistics = ParquetScanStats; - table_function.cardinality = ParquetCardinality; - table_function.table_scan_progress = ParquetProgress; - table_function.named_parameters["binary_as_string"] = LogicalType::BOOLEAN; - table_function.get_batch_index = ParquetScanGetBatchIndex; - table_function.projection_pushdown = true; - table_function.filter_pushdown = true; - set.AddFunction(table_function); - table_function.arguments = {LogicalType::LIST(LogicalType::VARCHAR)}; - table_function.bind = ParquetScanBindList; - table_function.named_parameters["binary_as_string"] = LogicalType::BOOLEAN; - set.AddFunction(table_function); - return set; - } - - static unique_ptr ParquetReadBind(ClientContext &context, CopyInfo &info, - vector &expected_names, - vector &expected_types) { - D_ASSERT(expected_names.size() == expected_types.size()); - for (auto &option : info.options) { - auto loption = StringUtil::Lower(option.first); - if (loption == "compression" || loption == "codec") { - // CODEC option has no effect on parquet read: we determine codec from the file - continue; - } else { - throw NotImplementedException("Unsupported option for COPY FROM parquet: %s", option.first); - } - } - auto result = make_unique(); - - FileSystem &fs = FileSystem::GetFileSystem(context); - result->files = fs.Glob(info.file_path, context); - if (result->files.empty()) { - throw IOException("No files found that match the pattern \"%s\"", info.file_path); - } - ParquetOptions parquet_options(context); - result->initial_reader = make_shared(context, result->files[0], expected_types, parquet_options); - result->names = result->initial_reader->names; - result->types = result->initial_reader->return_types; - return move(result); - } - - static unique_ptr ParquetScanStats(ClientContext &context, const FunctionData *bind_data_p, - column_t column_index) { - auto &bind_data = (ParquetReadBindData &)*bind_data_p; - - if (column_index == COLUMN_IDENTIFIER_ROW_ID) { - return nullptr; - } - - // we do not want to parse the Parquet metadata for the sole purpose of getting column statistics - - // We already parsed the metadata for the first file in a glob because we need some type info. - auto overall_stats = ParquetReader::ReadStatistics( - *bind_data.initial_reader, bind_data.initial_reader->return_types[column_index], column_index, - bind_data.initial_reader->metadata->metadata.get()); - - if (!overall_stats) { - return nullptr; - } - - // if there is only one file in the glob (quite common case), we are done - auto &config = DBConfig::GetConfig(context); - if (bind_data.files.size() < 2) { - return overall_stats; - } else if (config.object_cache_enable) { - auto &cache = ObjectCache::GetObjectCache(context); - // for more than one file, we could be lucky and metadata for *every* file is in the object cache (if - // enabled at all) - FileSystem &fs = FileSystem::GetFileSystem(context); - for (idx_t file_idx = 1; file_idx < bind_data.files.size(); file_idx++) { - auto &file_name = bind_data.files[file_idx]; - auto metadata = cache.Get(file_name); - if (!metadata) { - // missing metadata entry in cache, no usable stats - return nullptr; - } - auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK, - FileSystem::DEFAULT_COMPRESSION, FileSystem::GetFileOpener(context)); - // we need to check if the metadata cache entries are current - if (fs.GetLastModifiedTime(*handle) >= metadata->read_time) { - // missing or invalid metadata entry in cache, no usable stats overall - return nullptr; - } - // get and merge stats for file - auto file_stats = ParquetReader::ReadStatistics(*bind_data.initial_reader, - bind_data.initial_reader->return_types[column_index], - column_index, metadata->metadata.get()); - if (!file_stats) { - return nullptr; - } - overall_stats->Merge(*file_stats); - } - // success! - return overall_stats; - } - // we have more than one file and no object cache so no statistics overall - return nullptr; - } - - static unique_ptr ParquetScanBindInternal(ClientContext &context, vector files, - vector &return_types, vector &names, - ParquetOptions parquet_options) { - auto result = make_unique(); - result->files = move(files); - - result->initial_reader = make_shared(context, result->files[0], parquet_options); - return_types = result->types = result->initial_reader->return_types; - names = result->names = result->initial_reader->names; - return move(result); - } - - static vector ParquetGlob(FileSystem &fs, const string &glob, ClientContext &context) { - auto files = fs.Glob(glob, FileSystem::GetFileOpener(context)); - if (files.empty()) { - throw IOException("No files found that match the pattern \"%s\"", glob); - } - return files; - } - - static unique_ptr ParquetScanBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto &config = DBConfig::GetConfig(context); - if (!config.enable_external_access) { - throw PermissionException("Scanning Parquet files is disabled through configuration"); - } - auto file_name = input.inputs[0].GetValue(); - ParquetOptions parquet_options(context); - for (auto &kv : input.named_parameters) { - if (kv.first == "binary_as_string") { - parquet_options.binary_as_string = BooleanValue::Get(kv.second); - } - } - FileSystem &fs = FileSystem::GetFileSystem(context); - auto files = ParquetGlob(fs, file_name, context); - return ParquetScanBindInternal(context, move(files), return_types, names, parquet_options); - } - - static unique_ptr ParquetScanBindList(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto &config = DBConfig::GetConfig(context); - if (!config.enable_external_access) { - throw PermissionException("Scanning Parquet files is disabled through configuration"); - } - FileSystem &fs = FileSystem::GetFileSystem(context); - vector files; - for (auto &val : ListValue::GetChildren(input.inputs[0])) { - auto glob_files = ParquetGlob(fs, val.ToString(), context); - files.insert(files.end(), glob_files.begin(), glob_files.end()); - } - if (files.empty()) { - throw IOException("Parquet reader needs at least one file to read"); - } - ParquetOptions parquet_options(context); - for (auto &kv : input.named_parameters) { - if (kv.first == "binary_as_string") { - parquet_options.binary_as_string = BooleanValue::Get(kv.second); - } - } - return ParquetScanBindInternal(context, move(files), return_types, names, parquet_options); - } - - static double ParquetProgress(ClientContext &context, const FunctionData *bind_data_p, - const GlobalTableFunctionState *global_state) { - auto &bind_data = (ParquetReadBindData &)*bind_data_p; - if (bind_data.initial_reader->NumRows() == 0) { - return (100.0 * (bind_data.cur_file + 1)) / bind_data.files.size(); - } - auto percentage = (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100.0 / bind_data.initial_reader->NumRows()) / - bind_data.files.size(); - percentage += 100.0 * bind_data.cur_file / bind_data.files.size(); - return percentage; - } - - static unique_ptr - ParquetScanInitLocal(ExecutionContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *gstate_p) { - auto &bind_data = (ParquetReadBindData &)*input.bind_data; - auto &gstate = (ParquetReadGlobalState &)*gstate_p; - - auto result = make_unique(); - result->column_ids = input.column_ids; - result->is_parallel = true; - result->batch_index = 0; - result->table_filters = input.filters; - if (!ParquetParallelStateNext(context.client, bind_data, *result, gstate)) { - return nullptr; - } - return move(result); - } - - static unique_ptr ParquetScanInitGlobal(ClientContext &context, - TableFunctionInitInput &input) { - auto &bind_data = (ParquetReadBindData &)*input.bind_data; - auto result = make_unique(); - result->current_reader = bind_data.initial_reader; - result->row_group_index = 0; - result->file_index = 0; - result->batch_index = 0; - result->max_threads = ParquetScanMaxThreads(context, input.bind_data); - return move(result); - } - - static idx_t ParquetScanGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p, - LocalTableFunctionState *local_state, - GlobalTableFunctionState *global_state) { - auto &data = (ParquetReadLocalState &)*local_state; - return data.batch_index; - } - - static void ParquetScanImplementation(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { - if (!data_p.local_state) { - return; - } - auto &data = (ParquetReadLocalState &)*data_p.local_state; - auto &gstate = (ParquetReadGlobalState &)*data_p.global_state; - auto &bind_data = (ParquetReadBindData &)*data_p.bind_data; - - do { - data.reader->Scan(data.scan_state, output); - bind_data.chunk_count++; - if (output.size() > 0) { - return; - } - if (!ParquetParallelStateNext(context, bind_data, data, gstate)) { - return; - } - } while (true); - } - - static unique_ptr ParquetCardinality(ClientContext &context, const FunctionData *bind_data) { - auto &data = (ParquetReadBindData &)*bind_data; - return make_unique(data.initial_reader->NumRows() * data.files.size()); - } - - static idx_t ParquetScanMaxThreads(ClientContext &context, const FunctionData *bind_data) { - auto &data = (ParquetReadBindData &)*bind_data; - return data.initial_reader->NumRowGroups() * data.files.size(); - } - - static bool ParquetParallelStateNext(ClientContext &context, const ParquetReadBindData &bind_data, - ParquetReadLocalState &scan_data, ParquetReadGlobalState ¶llel_state) { - - lock_guard parallel_lock(parallel_state.lock); - if (parallel_state.row_group_index < parallel_state.current_reader->NumRowGroups()) { - // groups remain in the current parquet file: read the next group - scan_data.reader = parallel_state.current_reader; - vector group_indexes {parallel_state.row_group_index}; - scan_data.reader->InitializeScan(scan_data.scan_state, scan_data.column_ids, group_indexes, - scan_data.table_filters); - scan_data.batch_index = parallel_state.batch_index++; - scan_data.file_index = parallel_state.file_index; - parallel_state.row_group_index++; - return true; - } else { - // no groups remain in the current parquet file: check if there are more files to read - while (parallel_state.file_index + 1 < bind_data.files.size()) { - // read the next file - string file = bind_data.files[++parallel_state.file_index]; - parallel_state.current_reader = - make_shared(context, file, bind_data.names, bind_data.types, scan_data.column_ids, - parallel_state.current_reader->parquet_options, bind_data.files[0]); - if (parallel_state.current_reader->NumRowGroups() == 0) { - // empty parquet file, move to next file - continue; - } - // set up the scan state to read the first group - scan_data.reader = parallel_state.current_reader; - vector group_indexes {0}; - scan_data.reader->InitializeScan(scan_data.scan_state, scan_data.column_ids, group_indexes, - scan_data.table_filters); - scan_data.batch_index = parallel_state.batch_index++; - scan_data.file_index = parallel_state.file_index; - parallel_state.row_group_index = 1; - return true; - } - } - return false; - } -}; - -struct ParquetWriteBindData : public TableFunctionData { - vector sql_types; - string file_name; - vector column_names; - duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY; - idx_t row_group_size = 100000; -}; - -struct ParquetWriteGlobalState : public GlobalFunctionData { - unique_ptr writer; -}; - -struct ParquetWriteLocalState : public LocalFunctionData { - explicit ParquetWriteLocalState(Allocator &allocator) { - buffer = make_unique(allocator); - } - - unique_ptr buffer; -}; - -unique_ptr ParquetWriteBind(ClientContext &context, CopyInfo &info, vector &names, - vector &sql_types) { - auto bind_data = make_unique(); - for (auto &option : info.options) { - auto loption = StringUtil::Lower(option.first); - if (loption == "row_group_size" || loption == "chunk_size") { - bind_data->row_group_size = option.second[0].GetValue(); - } else if (loption == "compression" || loption == "codec") { - if (!option.second.empty()) { - auto roption = StringUtil::Lower(option.second[0].ToString()); - if (roption == "uncompressed") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED; - continue; - } else if (roption == "snappy") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY; - continue; - } else if (roption == "gzip") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP; - continue; - } else if (roption == "zstd") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD; - continue; - } - } - throw ParserException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption); - } else { - throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str()); - } - } - bind_data->sql_types = sql_types; - bind_data->column_names = names; - bind_data->file_name = info.file_path; - return move(bind_data); -} - -unique_ptr ParquetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data, - const string &file_path) { - auto global_state = make_unique(); - auto &parquet_bind = (ParquetWriteBindData &)bind_data; - - auto &fs = FileSystem::GetFileSystem(context); - global_state->writer = - make_unique(fs, file_path, FileSystem::GetFileOpener(context), parquet_bind.sql_types, - parquet_bind.column_names, parquet_bind.codec); - return move(global_state); -} - -void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, GlobalFunctionData &gstate, - LocalFunctionData &lstate, DataChunk &input) { - auto &bind_data = (ParquetWriteBindData &)bind_data_p; - auto &global_state = (ParquetWriteGlobalState &)gstate; - auto &local_state = (ParquetWriteLocalState &)lstate; - - // append data to the local (buffered) chunk collection - local_state.buffer->Append(input); - if (local_state.buffer->Count() > bind_data.row_group_size) { - // if the chunk collection exceeds a certain size we flush it to the parquet file - global_state.writer->Flush(*local_state.buffer); - // and reset the buffer - local_state.buffer = make_unique(Allocator::Get(context.client)); - } -} - -void ParquetWriteCombine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate) { - auto &global_state = (ParquetWriteGlobalState &)gstate; - auto &local_state = (ParquetWriteLocalState &)lstate; - // flush any data left in the local state to the file - global_state.writer->Flush(*local_state.buffer); -} - -void ParquetWriteFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { - auto &global_state = (ParquetWriteGlobalState &)gstate; - // finalize: write any additional metadata to the file here - global_state.writer->Finalize(); -} - -unique_ptr ParquetWriteInitializeLocal(ExecutionContext &context, FunctionData &bind_data) { - return make_unique(Allocator::Get(context.client)); -} - -unique_ptr ParquetScanReplacement(ClientContext &context, const string &table_name, - ReplacementScanData *data) { - if (!StringUtil::EndsWith(StringUtil::Lower(table_name), ".parquet")) { - return nullptr; - } - auto table_function = make_unique(); - vector> children; - children.push_back(make_unique(Value(table_name))); - table_function->function = make_unique("parquet_scan", move(children)); - return table_function; -} - -void ParquetExtension::Load(DuckDB &db) { - auto &fs = db.GetFileSystem(); - fs.RegisterSubSystem(FileCompressionType::ZSTD, make_unique()); - - auto scan_fun = ParquetScanFunction::GetFunctionSet(); - CreateTableFunctionInfo cinfo(scan_fun); - cinfo.name = "read_parquet"; - CreateTableFunctionInfo pq_scan = cinfo; - pq_scan.name = "parquet_scan"; - - ParquetMetaDataFunction meta_fun; - CreateTableFunctionInfo meta_cinfo(meta_fun); - - ParquetSchemaFunction schema_fun; - CreateTableFunctionInfo schema_cinfo(schema_fun); - - CopyFunction function("parquet"); - function.copy_to_bind = ParquetWriteBind; - function.copy_to_initialize_global = ParquetWriteInitializeGlobal; - function.copy_to_initialize_local = ParquetWriteInitializeLocal; - function.copy_to_sink = ParquetWriteSink; - function.copy_to_combine = ParquetWriteCombine; - function.copy_to_finalize = ParquetWriteFinalize; - function.copy_from_bind = ParquetScanFunction::ParquetReadBind; - function.copy_from_function = scan_fun.functions[0]; - - function.extension = "parquet"; - CreateCopyFunctionInfo info(function); - - Connection con(db); - con.BeginTransaction(); - auto &context = *con.context; - auto &catalog = Catalog::GetCatalog(context); - catalog.CreateCopyFunction(context, &info); - catalog.CreateTableFunction(context, &cinfo); - catalog.CreateTableFunction(context, &pq_scan); - catalog.CreateTableFunction(context, &meta_cinfo); - catalog.CreateTableFunction(context, &schema_cinfo); - con.Commit(); - - auto &config = DBConfig::GetConfig(*db.instance); - config.replacement_scans.emplace_back(ParquetScanReplacement); - config.AddExtensionOption("binary_as_string", "In Parquet files, interpret binary data as a string.", - LogicalType::BOOLEAN); -} - -std::string ParquetExtension::Name() { - return "parquet"; -} - -} // namespace duckdb - -#ifndef DUCKDB_EXTENSION_MAIN -#error DUCKDB_EXTENSION_MAIN not defined -#endif - - - -#include - -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/blob.hpp" -#include "duckdb/main/config.hpp" -#endif - -namespace duckdb { - -struct ParquetMetaDataBindData : public TableFunctionData { - vector return_types; - vector files; - -public: - bool Equals(const FunctionData &other_p) const override { - auto &other = (const ParquetMetaDataBindData &)other_p; - return other.return_types == return_types && files == other.files; - } -}; - -struct ParquetMetaDataOperatorData : public GlobalTableFunctionState { - explicit ParquetMetaDataOperatorData(Allocator &allocator) : collection(allocator) { - } - - idx_t file_index; - ChunkCollection collection; - - static void BindMetaData(vector &return_types, vector &names); - static void BindSchema(vector &return_types, vector &names); - - void LoadFileMetaData(ClientContext &context, const vector &return_types, const string &file_path); - void LoadSchemaData(ClientContext &context, const vector &return_types, const string &file_path); -}; - -template -string ConvertParquetElementToString(T &&entry) { - std::stringstream ss; - ss << entry; - return ss.str(); -} - -template -string PrintParquetElementToString(T &&entry) { - std::stringstream ss; - entry.printTo(ss); - return ss.str(); -} - -void ParquetMetaDataOperatorData::BindMetaData(vector &return_types, vector &names) { - names.emplace_back("file_name"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("row_group_id"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("row_group_num_rows"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("row_group_num_columns"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("row_group_bytes"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("column_id"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("file_offset"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("num_values"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("path_in_schema"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("type"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("stats_min"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("stats_max"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("stats_null_count"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("stats_distinct_count"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("stats_min_value"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("stats_max_value"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("compression"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("encodings"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("index_page_offset"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("dictionary_page_offset"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("data_page_offset"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("total_compressed_size"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("total_uncompressed_size"); - return_types.emplace_back(LogicalType::BIGINT); -} - -Value ConvertParquetStats(const LogicalType &type, const duckdb_parquet::format::SchemaElement &schema_ele, - bool stats_is_set, const std::string &stats) { - if (!stats_is_set) { - return Value(LogicalType::VARCHAR); - } - return ParquetStatisticsUtils::ConvertValue(type, schema_ele, stats).CastAs(LogicalType::VARCHAR); -} - -void ParquetMetaDataOperatorData::LoadFileMetaData(ClientContext &context, const vector &return_types, - const string &file_path) { - collection.Reset(); - ParquetOptions parquet_options(context); - auto reader = make_unique(context, file_path, parquet_options); - idx_t count = 0; - DataChunk current_chunk; - current_chunk.Initialize(context, return_types); - auto meta_data = reader->GetFileMetadata(); - vector column_types; - vector schema_indexes; - for (idx_t schema_idx = 0; schema_idx < meta_data->schema.size(); schema_idx++) { - auto &schema_element = meta_data->schema[schema_idx]; - if (schema_element.num_children > 0) { - continue; - } - column_types.push_back(ParquetReader::DeriveLogicalType(schema_element, false)); - schema_indexes.push_back(schema_idx); - } - - for (idx_t row_group_idx = 0; row_group_idx < meta_data->row_groups.size(); row_group_idx++) { - auto &row_group = meta_data->row_groups[row_group_idx]; - - if (row_group.columns.size() > column_types.size()) { - throw InternalException("Too many column in row group: corrupt file?"); - } - for (idx_t col_idx = 0; col_idx < row_group.columns.size(); col_idx++) { - auto &column = row_group.columns[col_idx]; - auto &col_meta = column.meta_data; - auto &stats = col_meta.statistics; - auto &schema_element = meta_data->schema[schema_indexes[col_idx]]; - auto &column_type = column_types[col_idx]; - - // file_name, LogicalType::VARCHAR - current_chunk.SetValue(0, count, file_path); - - // row_group_id, LogicalType::BIGINT - current_chunk.SetValue(1, count, Value::BIGINT(row_group_idx)); - - // row_group_num_rows, LogicalType::BIGINT - current_chunk.SetValue(2, count, Value::BIGINT(row_group.num_rows)); - - // row_group_num_columns, LogicalType::BIGINT - current_chunk.SetValue(3, count, Value::BIGINT(row_group.columns.size())); - - // row_group_bytes, LogicalType::BIGINT - current_chunk.SetValue(4, count, Value::BIGINT(row_group.total_byte_size)); - - // column_id, LogicalType::BIGINT - current_chunk.SetValue(5, count, Value::BIGINT(col_idx)); - - // file_offset, LogicalType::BIGINT - current_chunk.SetValue(6, count, Value::BIGINT(column.file_offset)); - - // num_values, LogicalType::BIGINT - current_chunk.SetValue(7, count, Value::BIGINT(col_meta.num_values)); - - // path_in_schema, LogicalType::VARCHAR - current_chunk.SetValue(8, count, StringUtil::Join(col_meta.path_in_schema, ", ")); - - // type, LogicalType::VARCHAR - current_chunk.SetValue(9, count, ConvertParquetElementToString(col_meta.type)); - - // stats_min, LogicalType::VARCHAR - current_chunk.SetValue(10, count, - ConvertParquetStats(column_type, schema_element, stats.__isset.min, stats.min)); - - // stats_max, LogicalType::VARCHAR - current_chunk.SetValue(11, count, - ConvertParquetStats(column_type, schema_element, stats.__isset.max, stats.max)); - - // stats_null_count, LogicalType::BIGINT - current_chunk.SetValue( - 12, count, stats.__isset.null_count ? Value::BIGINT(stats.null_count) : Value(LogicalType::BIGINT)); - - // stats_distinct_count, LogicalType::BIGINT - current_chunk.SetValue(13, count, - stats.__isset.distinct_count ? Value::BIGINT(stats.distinct_count) - : Value(LogicalType::BIGINT)); - - // stats_min_value, LogicalType::VARCHAR - current_chunk.SetValue( - 14, count, ConvertParquetStats(column_type, schema_element, stats.__isset.min_value, stats.min_value)); - - // stats_max_value, LogicalType::VARCHAR - current_chunk.SetValue( - 15, count, ConvertParquetStats(column_type, schema_element, stats.__isset.max_value, stats.max_value)); - - // compression, LogicalType::VARCHAR - current_chunk.SetValue(16, count, ConvertParquetElementToString(col_meta.codec)); - - // encodings, LogicalType::VARCHAR - vector encoding_string; - for (auto &encoding : col_meta.encodings) { - encoding_string.push_back(ConvertParquetElementToString(encoding)); - } - current_chunk.SetValue(17, count, Value(StringUtil::Join(encoding_string, ", "))); - - // index_page_offset, LogicalType::BIGINT - current_chunk.SetValue(18, count, Value::BIGINT(col_meta.index_page_offset)); - - // dictionary_page_offset, LogicalType::BIGINT - current_chunk.SetValue(19, count, Value::BIGINT(col_meta.dictionary_page_offset)); - - // data_page_offset, LogicalType::BIGINT - current_chunk.SetValue(20, count, Value::BIGINT(col_meta.data_page_offset)); - - // total_compressed_size, LogicalType::BIGINT - current_chunk.SetValue(21, count, Value::BIGINT(col_meta.total_compressed_size)); - - // total_uncompressed_size, LogicalType::BIGINT - current_chunk.SetValue(22, count, Value::BIGINT(col_meta.total_uncompressed_size)); - - count++; - if (count >= STANDARD_VECTOR_SIZE) { - current_chunk.SetCardinality(count); - collection.Append(current_chunk); - - count = 0; - current_chunk.Reset(); - } - } - } - current_chunk.SetCardinality(count); - collection.Append(current_chunk); -} - -void ParquetMetaDataOperatorData::BindSchema(vector &return_types, vector &names) { - names.emplace_back("file_name"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("name"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("type"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("type_length"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("repetition_type"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("num_children"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("converted_type"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("scale"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("precision"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("field_id"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("logical_type"); - return_types.emplace_back(LogicalType::VARCHAR); -} - -Value ParquetLogicalTypeToString(const duckdb_parquet::format::LogicalType &type) { - - if (type.__isset.STRING) { - return Value(PrintParquetElementToString(type.STRING)); - } - if (type.__isset.MAP) { - return Value(PrintParquetElementToString(type.MAP)); - } - if (type.__isset.LIST) { - return Value(PrintParquetElementToString(type.LIST)); - } - if (type.__isset.ENUM) { - return Value(PrintParquetElementToString(type.ENUM)); - } - if (type.__isset.DECIMAL) { - return Value(PrintParquetElementToString(type.DECIMAL)); - } - if (type.__isset.DATE) { - return Value(PrintParquetElementToString(type.DATE)); - } - if (type.__isset.TIME) { - return Value(PrintParquetElementToString(type.TIME)); - } - if (type.__isset.TIMESTAMP) { - return Value(PrintParquetElementToString(type.TIMESTAMP)); - } - if (type.__isset.INTEGER) { - return Value(PrintParquetElementToString(type.INTEGER)); - } - if (type.__isset.UNKNOWN) { - return Value(PrintParquetElementToString(type.UNKNOWN)); - } - if (type.__isset.JSON) { - return Value(PrintParquetElementToString(type.JSON)); - } - if (type.__isset.BSON) { - return Value(PrintParquetElementToString(type.BSON)); - } - if (type.__isset.UUID) { - return Value(PrintParquetElementToString(type.UUID)); - } - return Value(); -} - -void ParquetMetaDataOperatorData::LoadSchemaData(ClientContext &context, const vector &return_types, - const string &file_path) { - collection.Reset(); - ParquetOptions parquet_options(context); - auto reader = make_unique(context, file_path, parquet_options); - idx_t count = 0; - DataChunk current_chunk; - current_chunk.Initialize(context, return_types); - auto meta_data = reader->GetFileMetadata(); - for (idx_t col_idx = 0; col_idx < meta_data->schema.size(); col_idx++) { - auto &column = meta_data->schema[col_idx]; - - // file_name, LogicalType::VARCHAR - current_chunk.SetValue(0, count, file_path); - - // name, LogicalType::VARCHAR - current_chunk.SetValue(1, count, column.name); - - // type, LogicalType::VARCHAR - current_chunk.SetValue(2, count, ConvertParquetElementToString(column.type)); - - // type_length, LogicalType::VARCHAR - current_chunk.SetValue(3, count, Value::INTEGER(column.type_length)); - - // repetition_type, LogicalType::VARCHAR - current_chunk.SetValue(4, count, ConvertParquetElementToString(column.repetition_type)); - - // num_children, LogicalType::BIGINT - current_chunk.SetValue(5, count, Value::BIGINT(column.num_children)); - - // converted_type, LogicalType::VARCHAR - current_chunk.SetValue(6, count, ConvertParquetElementToString(column.converted_type)); - - // scale, LogicalType::BIGINT - current_chunk.SetValue(7, count, Value::BIGINT(column.scale)); - - // precision, LogicalType::BIGINT - current_chunk.SetValue(8, count, Value::BIGINT(column.precision)); - - // field_id, LogicalType::BIGINT - current_chunk.SetValue(9, count, Value::BIGINT(column.field_id)); - - // logical_type, LogicalType::VARCHAR - current_chunk.SetValue(10, count, ParquetLogicalTypeToString(column.logicalType)); - - count++; - if (count >= STANDARD_VECTOR_SIZE) { - current_chunk.SetCardinality(count); - collection.Append(current_chunk); - - count = 0; - current_chunk.Reset(); - } - } - current_chunk.SetCardinality(count); - collection.Append(current_chunk); -} - -template -unique_ptr ParquetMetaDataBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto &config = DBConfig::GetConfig(context); - if (!config.enable_external_access) { - throw PermissionException("Scanning Parquet files is disabled through configuration"); - } - if (SCHEMA) { - ParquetMetaDataOperatorData::BindSchema(return_types, names); - } else { - ParquetMetaDataOperatorData::BindMetaData(return_types, names); - } - - auto file_name = input.inputs[0].GetValue(); - auto result = make_unique(); - - FileSystem &fs = FileSystem::GetFileSystem(context); - result->return_types = return_types; - result->files = fs.Glob(file_name, context); - if (result->files.empty()) { - throw IOException("No files found that match the pattern \"%s\"", file_name); - } - return move(result); -} - -template -unique_ptr ParquetMetaDataInit(ClientContext &context, TableFunctionInitInput &input) { - auto &bind_data = (ParquetMetaDataBindData &)*input.bind_data; - D_ASSERT(!bind_data.files.empty()); - - auto result = make_unique(Allocator::Get(context)); - if (SCHEMA) { - result->LoadSchemaData(context, bind_data.return_types, bind_data.files[0]); - } else { - result->LoadFileMetaData(context, bind_data.return_types, bind_data.files[0]); - } - result->file_index = 0; - return move(result); -} - -template -void ParquetMetaDataImplementation(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { - auto &data = (ParquetMetaDataOperatorData &)*data_p.global_state; - auto &bind_data = (ParquetMetaDataBindData &)*data_p.bind_data; - while (true) { - auto chunk = data.collection.Fetch(); - if (!chunk) { - if (data.file_index + 1 < bind_data.files.size()) { - // load the metadata for the next file - data.file_index++; - if (SCHEMA) { - data.LoadSchemaData(context, bind_data.return_types, bind_data.files[data.file_index]); - } else { - data.LoadFileMetaData(context, bind_data.return_types, bind_data.files[data.file_index]); - } - continue; - } else { - // no files remaining: done - return; - } - } - output.Move(*chunk); - if (output.size() != 0) { - return; - } - } -} - -ParquetMetaDataFunction::ParquetMetaDataFunction() - : TableFunction("parquet_metadata", {LogicalType::VARCHAR}, ParquetMetaDataImplementation, - ParquetMetaDataBind, ParquetMetaDataInit) { -} - -ParquetSchemaFunction::ParquetSchemaFunction() - : TableFunction("parquet_schema", {LogicalType::VARCHAR}, ParquetMetaDataImplementation, - ParquetMetaDataBind, ParquetMetaDataInit) { -} - -} // namespace duckdb - - - - - - - - - - - - - - - - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/planner/table_filter.hpp" -#include "duckdb/planner/filter/constant_filter.hpp" -#include "duckdb/planner/filter/null_filter.hpp" -#include "duckdb/planner/filter/conjunction_filter.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/pair.hpp" -#include "duckdb/common/vector_operations/vector_operations.hpp" - -#include "duckdb/storage/object_cache.hpp" -#endif - -#include -#include -#include -#include - -namespace duckdb { - -using duckdb_parquet::format::ColumnChunk; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::FileMetaData; -using ParquetRowGroup = duckdb_parquet::format::RowGroup; -using duckdb_parquet::format::SchemaElement; -using duckdb_parquet::format::Statistics; -using duckdb_parquet::format::Type; - -static unique_ptr -CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, FileOpener &opener, bool prefetch_mode) { - auto transport = make_shared(allocator, file_handle, opener, prefetch_mode); - return make_unique>(move(transport)); -} - -static shared_ptr LoadMetadata(Allocator &allocator, FileHandle &file_handle, - FileOpener &opener) { - auto current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - - auto proto = CreateThriftProtocol(allocator, file_handle, opener, false); - auto &transport = ((ThriftFileTransport &)*proto->getTransport()); - auto file_size = transport.GetSize(); - if (file_size < 12) { - throw InvalidInputException("File '%s' too small to be a Parquet file", file_handle.path); - } - - ResizeableBuffer buf; - buf.resize(allocator, 8); - buf.zero(); - - transport.SetLocation(file_size - 8); - transport.read((uint8_t *)buf.ptr, 8); - - if (strncmp(buf.ptr + 4, "PAR1", 4) != 0) { - throw InvalidInputException("No magic bytes found at end of file '%s'", file_handle.path); - } - // read four-byte footer length from just before the end magic bytes - auto footer_len = *(uint32_t *)buf.ptr; - if (footer_len <= 0 || file_size < 12 + footer_len) { - throw InvalidInputException("Footer length error in file '%s'", file_handle.path); - } - auto metadata_pos = file_size - (footer_len + 8); - transport.SetLocation(metadata_pos); - transport.Prefetch(metadata_pos, footer_len); - - auto metadata = make_unique(); - metadata->read(proto.get()); - return make_shared(move(metadata), current_time); -} - -LogicalType ParquetReader::DeriveLogicalType(const SchemaElement &s_ele, bool binary_as_string) { - // inner node - D_ASSERT(s_ele.__isset.type && s_ele.num_children == 0); - if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY && !s_ele.__isset.type_length) { - throw IOException("FIXED_LEN_BYTE_ARRAY requires length to be set"); - } - if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY && s_ele.__isset.logicalType && s_ele.logicalType.__isset.UUID) { - return LogicalType::UUID; - } - if (s_ele.__isset.converted_type) { - switch (s_ele.converted_type) { - case ConvertedType::INT_8: - if (s_ele.type == Type::INT32) { - return LogicalType::TINYINT; - } else { - throw IOException("INT8 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::INT_16: - if (s_ele.type == Type::INT32) { - return LogicalType::SMALLINT; - } else { - throw IOException("INT16 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::INT_32: - if (s_ele.type == Type::INT32) { - return LogicalType::INTEGER; - } else { - throw IOException("INT32 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::INT_64: - if (s_ele.type == Type::INT64) { - return LogicalType::BIGINT; - } else { - throw IOException("INT64 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::UINT_8: - if (s_ele.type == Type::INT32) { - return LogicalType::UTINYINT; - } else { - throw IOException("UINT8 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::UINT_16: - if (s_ele.type == Type::INT32) { - return LogicalType::USMALLINT; - } else { - throw IOException("UINT16 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::UINT_32: - if (s_ele.type == Type::INT32) { - return LogicalType::UINTEGER; - } else { - throw IOException("UINT32 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::UINT_64: - if (s_ele.type == Type::INT64) { - return LogicalType::UBIGINT; - } else { - throw IOException("UINT64 converted type can only be set for value of Type::INT64"); - } - case ConvertedType::DATE: - if (s_ele.type == Type::INT32) { - return LogicalType::DATE; - } else { - throw IOException("DATE converted type can only be set for value of Type::INT32"); - } - case ConvertedType::TIMESTAMP_MICROS: - case ConvertedType::TIMESTAMP_MILLIS: - if (s_ele.type == Type::INT64) { - return LogicalType::TIMESTAMP; - } else { - throw IOException("TIMESTAMP converted type can only be set for value of Type::INT64"); - } - case ConvertedType::DECIMAL: - if (!s_ele.__isset.precision || !s_ele.__isset.scale) { - throw IOException("DECIMAL requires a length and scale specifier!"); - } - switch (s_ele.type) { - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - case Type::INT32: - case Type::INT64: - return LogicalType::DECIMAL(s_ele.precision, s_ele.scale); - default: - throw IOException( - "DECIMAL converted type can only be set for value of Type::(FIXED_LEN_)BYTE_ARRAY/INT32/INT64"); - } - case ConvertedType::UTF8: - case ConvertedType::ENUM: - switch (s_ele.type) { - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - return LogicalType::VARCHAR; - default: - throw IOException("UTF8 converted type can only be set for Type::(FIXED_LEN_)BYTE_ARRAY"); - } - case ConvertedType::TIME_MILLIS: - case ConvertedType::TIME_MICROS: - if (s_ele.type == Type::INT64) { - return LogicalType::TIME; - } else { - throw IOException("TIME_MICROS converted type can only be set for value of Type::INT64"); - } - case ConvertedType::INTERVAL: - return LogicalType::INTERVAL; - case ConvertedType::MAP: - case ConvertedType::MAP_KEY_VALUE: - case ConvertedType::LIST: - case ConvertedType::JSON: - case ConvertedType::BSON: - default: - throw IOException("Unsupported converted type"); - } - } else { - // no converted type set - // use default type for each physical type - switch (s_ele.type) { - case Type::BOOLEAN: - return LogicalType::BOOLEAN; - case Type::INT32: - return LogicalType::INTEGER; - case Type::INT64: - return LogicalType::BIGINT; - case Type::INT96: // always a timestamp it would seem - return LogicalType::TIMESTAMP; - case Type::FLOAT: - return LogicalType::FLOAT; - case Type::DOUBLE: - return LogicalType::DOUBLE; - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - if (binary_as_string) { - return LogicalType::VARCHAR; - } - return LogicalType::BLOB; - default: - return LogicalType::INVALID; - } - } -} - -LogicalType ParquetReader::DeriveLogicalType(const SchemaElement &s_ele) { - return DeriveLogicalType(s_ele, parquet_options.binary_as_string); -} - -unique_ptr ParquetReader::CreateReaderRecursive(const FileMetaData *file_meta_data, idx_t depth, - idx_t max_define, idx_t max_repeat, - idx_t &next_schema_idx, idx_t &next_file_idx) { - D_ASSERT(file_meta_data); - D_ASSERT(next_schema_idx < file_meta_data->schema.size()); - auto &s_ele = file_meta_data->schema[next_schema_idx]; - auto this_idx = next_schema_idx; - - if (s_ele.__isset.repetition_type) { - if (s_ele.repetition_type != FieldRepetitionType::REQUIRED) { - max_define++; - } - if (s_ele.repetition_type == FieldRepetitionType::REPEATED) { - max_repeat++; - } - } - - if (!s_ele.__isset.type) { // inner node - if (s_ele.num_children == 0) { - throw std::runtime_error("Node has no children but should"); - } - child_list_t child_types; - vector> child_readers; - - idx_t c_idx = 0; - while (c_idx < (idx_t)s_ele.num_children) { - next_schema_idx++; - - auto &child_ele = file_meta_data->schema[next_schema_idx]; - - auto child_reader = CreateReaderRecursive(file_meta_data, depth + 1, max_define, max_repeat, - next_schema_idx, next_file_idx); - child_types.push_back(make_pair(child_ele.name, child_reader->Type())); - child_readers.push_back(move(child_reader)); - - c_idx++; - } - D_ASSERT(!child_types.empty()); - unique_ptr result; - LogicalType result_type; - - bool is_repeated = s_ele.repetition_type == FieldRepetitionType::REPEATED; - bool is_list = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::LIST; - bool is_map = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::MAP; - bool is_map_kv = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::MAP_KEY_VALUE; - if (!is_map_kv && this_idx > 0) { - // check if the parent node of this is a map - auto &p_ele = file_meta_data->schema[this_idx - 1]; - bool parent_is_map = p_ele.__isset.converted_type && p_ele.converted_type == ConvertedType::MAP; - bool parent_has_children = p_ele.__isset.num_children && p_ele.num_children == 1; - is_map_kv = parent_is_map && parent_has_children; - } - - if (is_map_kv) { - if (child_types.size() != 2) { - throw IOException("MAP_KEY_VALUE requires two children"); - } - if (!is_repeated) { - throw IOException("MAP_KEY_VALUE needs to be repeated"); - } - result_type = LogicalType::MAP(move(child_types[0].second), move(child_types[1].second)); - for (auto &child_reader : child_readers) { - auto child_type = LogicalType::LIST(child_reader->Type()); - child_reader = make_unique(*this, move(child_type), s_ele, this_idx, max_define, - max_repeat, move(child_reader)); - } - result = make_unique(*this, result_type, s_ele, this_idx, max_define - 1, - max_repeat - 1, move(child_readers)); - return result; - } - if (child_types.size() > 1 || (!is_list && !is_map && !is_repeated)) { - result_type = LogicalType::STRUCT(move(child_types)); - result = make_unique(*this, result_type, s_ele, this_idx, max_define, max_repeat, - move(child_readers)); - } else { - // if we have a struct with only a single type, pull up - result_type = child_types[0].second; - result = move(child_readers[0]); - } - if (is_repeated) { - result_type = LogicalType::LIST(result_type); - return make_unique(*this, result_type, s_ele, this_idx, max_define, max_repeat, - move(result)); - } - return result; - } else { // leaf node - if (s_ele.repetition_type == FieldRepetitionType::REPEATED) { - const auto derived_type = DeriveLogicalType(s_ele); - auto list_type = LogicalType::LIST(derived_type); - - auto element_reader = - ColumnReader::CreateReader(*this, derived_type, s_ele, next_file_idx++, max_define, max_repeat); - - return make_unique(*this, list_type, s_ele, this_idx, max_define, max_repeat, - move(element_reader)); - } - - // TODO check return value of derive type or should we only do this on read() - return ColumnReader::CreateReader(*this, DeriveLogicalType(s_ele), s_ele, next_file_idx++, max_define, - max_repeat); - } -} - -// TODO we don't need readers for columns we are not going to read ay -unique_ptr ParquetReader::CreateReader(const duckdb_parquet::format::FileMetaData *file_meta_data) { - idx_t next_schema_idx = 0; - idx_t next_file_idx = 0; - - auto ret = CreateReaderRecursive(file_meta_data, 0, 0, 0, next_schema_idx, next_file_idx); - D_ASSERT(next_schema_idx == file_meta_data->schema.size() - 1); - D_ASSERT(file_meta_data->row_groups.empty() || next_file_idx == file_meta_data->row_groups[0].columns.size()); - - // add casts if required - for (auto &entry : cast_map) { - auto column_idx = entry.first; - auto &expected_type = entry.second; - - auto &root_struct_reader = (StructColumnReader &)*ret; - auto child_reader = move(root_struct_reader.child_readers[column_idx]); - auto cast_reader = make_unique(move(child_reader), expected_type); - root_struct_reader.child_readers[column_idx] = move(cast_reader); - } - return ret; -} - -void ParquetReader::InitializeSchema(const vector &expected_names, const vector &expected_types, - const vector &column_ids, const string &initial_filename_p) { - D_ASSERT(expected_names.size() == expected_types.size() || (expected_names.empty() && column_ids.empty())); - auto file_meta_data = GetFileMetadata(); - - if (file_meta_data->__isset.encryption_algorithm) { - throw FormatException("Encrypted Parquet files are not supported"); - } - // check if we like this schema - if (file_meta_data->schema.size() < 2) { - throw FormatException("Need at least one non-root column in the file"); - } - - bool has_expected_names = !expected_names.empty(); - bool has_expected_types = !expected_types.empty(); - auto root_reader = CreateReader(file_meta_data); - - auto &root_type = root_reader->Type(); - auto &child_types = StructType::GetChildTypes(root_type); - D_ASSERT(root_type.id() == LogicalTypeId::STRUCT); - for (auto &type_pair : child_types) { - names.push_back(type_pair.first); - return_types.push_back(type_pair.second); - } - D_ASSERT(!names.empty()); - D_ASSERT(!return_types.empty()); - if (!has_expected_types) { - return; - } - if (!has_expected_names && has_expected_types) { - // we ONLY have expected types, but no expected names - // in this case we need all types to match in-order - if (return_types.size() != expected_types.size()) { - throw FormatException("column count mismatch: expected %d columns but found %d", expected_types.size(), - return_types.size()); - } - for (idx_t col_idx = 0; col_idx < return_types.size(); col_idx++) { - if (return_types[col_idx] == expected_types[col_idx]) { - continue; - } - // type mismatch: have to add a cast - cast_map[col_idx] = expected_types[col_idx]; - } - return; - } - D_ASSERT(column_ids.size() > 0); - // we have expected types: create a map of name -> column index - unordered_map name_map; - for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) { - name_map[names[col_idx]] = col_idx; - } - // now for each of the expected names, look it up in the name map and fill the column_id_map - D_ASSERT(column_id_map.empty()); - for (idx_t i = 0; i < column_ids.size(); i++) { - if (column_ids[i] == COLUMN_IDENTIFIER_ROW_ID) { - continue; - } - auto &expected_name = expected_names[column_ids[i]]; - auto &expected_type = expected_types[column_ids[i]]; - auto entry = name_map.find(expected_name); - if (entry == name_map.end()) { - throw FormatException("schema mismatch in Parquet glob: column \"%s\" was read from the original file " - "\"%s\", but could not be found in file \"%s\"", - expected_name, initial_filename_p, file_name); - } - auto column_idx = entry->second; - column_id_map.push_back(column_idx); - if (expected_type != return_types[column_idx]) { - // type mismatch: have to add a cast - cast_map[column_idx] = expected_type; - } - } -} - -ParquetOptions::ParquetOptions(ClientContext &context) { - Value binary_as_string_val; - if (context.TryGetCurrentSetting("binary_as_string", binary_as_string_val)) { - binary_as_string = binary_as_string_val.GetValue(); - } -} - -ParquetReader::ParquetReader(Allocator &allocator_p, unique_ptr file_handle_p, - const vector &expected_types_p, const string &initial_filename_p) - : allocator(allocator_p) { - file_name = file_handle_p->path; - file_handle = move(file_handle_p); - metadata = LoadMetadata(allocator, *file_handle, *file_opener); - vector expected_names; - vector expected_column_ids; - InitializeSchema(expected_names, expected_types_p, expected_column_ids, initial_filename_p); -} - -ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, const vector &expected_names, - const vector &expected_types_p, const vector &column_ids, - ParquetOptions parquet_options_p, const string &initial_filename_p) - : allocator(BufferAllocator::Get(context_p)), file_opener(FileSystem::GetFileOpener(context_p)), - parquet_options(parquet_options_p) { - auto &fs = FileSystem::GetFileSystem(context_p); - file_name = move(file_name_p); - file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK, - FileSystem::DEFAULT_COMPRESSION, file_opener); - if (!file_handle->CanSeek()) { - throw NotImplementedException( - "Reading parquet files from a FIFO stream is not supported and cannot be efficiently supported since " - "metadata is located at the end of the file. Write the stream to disk first and read from there instead."); - } - // If object cached is disabled - // or if this file has cached metadata - // or if the cached version already expired - auto last_modify_time = fs.GetLastModifiedTime(*file_handle); - if (!ObjectCache::ObjectCacheEnabled(context_p)) { - metadata = LoadMetadata(allocator, *file_handle, *file_opener); - } else { - metadata = ObjectCache::GetObjectCache(context_p).Get(file_name); - if (!metadata || (last_modify_time + 10 >= metadata->read_time)) { - metadata = LoadMetadata(allocator, *file_handle, *file_opener); - ObjectCache::GetObjectCache(context_p).Put(file_name, metadata); - } - } - InitializeSchema(expected_names, expected_types_p, column_ids, initial_filename_p); -} - -ParquetReader::~ParquetReader() { -} - -const FileMetaData *ParquetReader::GetFileMetadata() { - D_ASSERT(metadata); - D_ASSERT(metadata->metadata); - return metadata->metadata.get(); -} - -// TODO also somewhat ugly, perhaps this can be moved to the column reader too -unique_ptr ParquetReader::ReadStatistics(ParquetReader &reader, LogicalType &type, - column_t file_col_idx, const FileMetaData *file_meta_data) { - unique_ptr column_stats; - auto root_reader = reader.CreateReader(file_meta_data); - auto column_reader = ((StructColumnReader *)root_reader.get())->GetChildReader(file_col_idx); - - for (auto &row_group : file_meta_data->row_groups) { - auto chunk_stats = column_reader->Stats(row_group.columns); - if (!chunk_stats) { - return nullptr; - } - if (!column_stats) { - column_stats = move(chunk_stats); - } else { - column_stats->Merge(*chunk_stats); - } - } - return column_stats; -} - -const ParquetRowGroup &ParquetReader::GetGroup(ParquetReaderScanState &state) { - auto file_meta_data = GetFileMetadata(); - D_ASSERT(state.current_group >= 0 && (idx_t)state.current_group < state.group_idx_list.size()); - D_ASSERT(state.group_idx_list[state.current_group] >= 0 && - state.group_idx_list[state.current_group] < file_meta_data->row_groups.size()); - return file_meta_data->row_groups[state.group_idx_list[state.current_group]]; -} - -uint64_t ParquetReader::GetGroupCompressedSize(ParquetReaderScanState &state) { - auto &group = GetGroup(state); - auto total_compressed_size = group.total_compressed_size; - - idx_t calc_compressed_size = 0; - - // If the global total_compressed_size is not set, we can still calculate it - if (group.total_compressed_size == 0) { - for (auto &column_chunk : group.columns) { - calc_compressed_size += column_chunk.meta_data.total_compressed_size; - } - } - - if (total_compressed_size != 0 && calc_compressed_size != 0 && - (idx_t)total_compressed_size != calc_compressed_size) { - throw std::runtime_error("mismatch between calculated compressed size and reported compressed size"); - } - - return total_compressed_size ? total_compressed_size : calc_compressed_size; -} - -uint64_t ParquetReader::GetGroupSpan(ParquetReaderScanState &state) { - auto &group = GetGroup(state); - idx_t min_offset = NumericLimits::Maximum(); - idx_t max_offset = NumericLimits::Minimum(); - - for (auto &column_chunk : group.columns) { - - // Set the min offset - idx_t current_min_offset = NumericLimits::Maximum(); - if (column_chunk.meta_data.__isset.dictionary_page_offset) { - current_min_offset = MinValue(current_min_offset, column_chunk.meta_data.dictionary_page_offset); - } - if (column_chunk.meta_data.__isset.index_page_offset) { - current_min_offset = MinValue(current_min_offset, column_chunk.meta_data.index_page_offset); - } - current_min_offset = MinValue(current_min_offset, column_chunk.meta_data.data_page_offset); - min_offset = MinValue(current_min_offset, min_offset); - max_offset = MaxValue(max_offset, column_chunk.meta_data.total_compressed_size + current_min_offset); - } - - return max_offset - min_offset; -} - -idx_t ParquetReader::GetGroupOffset(ParquetReaderScanState &state) { - auto &group = GetGroup(state); - idx_t min_offset = NumericLimits::Maximum(); - - for (auto &column_chunk : group.columns) { - if (column_chunk.meta_data.__isset.dictionary_page_offset) { - min_offset = MinValue(min_offset, column_chunk.meta_data.dictionary_page_offset); - } - if (column_chunk.meta_data.__isset.index_page_offset) { - min_offset = MinValue(min_offset, column_chunk.meta_data.index_page_offset); - } - min_offset = MinValue(min_offset, column_chunk.meta_data.data_page_offset); - } - - return min_offset; -} - -void ParquetReader::PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx) { - auto &group = GetGroup(state); - - auto column_reader = ((StructColumnReader *)state.root_reader.get())->GetChildReader(state.column_ids[out_col_idx]); - - // TODO move this to columnreader too - if (state.filters) { - auto stats = column_reader->Stats(group.columns); - // filters contain output chunk index, not file col idx! - auto filter_entry = state.filters->filters.find(out_col_idx); - if (stats && filter_entry != state.filters->filters.end()) { - bool skip_chunk = false; - auto &filter = *filter_entry->second; - auto prune_result = filter.CheckStatistics(*stats); - if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) { - skip_chunk = true; - } - if (skip_chunk) { - // this effectively will skip this chunk - state.group_offset = group.num_rows; - return; - } - } - } - - state.root_reader->InitializeRead(group.columns, *state.thrift_file_proto); -} - -idx_t ParquetReader::NumRows() { - return GetFileMetadata()->num_rows; -} - -idx_t ParquetReader::NumRowGroups() { - return GetFileMetadata()->row_groups.size(); -} - -void ParquetReader::InitializeScan(ParquetReaderScanState &state, vector column_ids, - vector groups_to_read, TableFilterSet *filters) { - state.current_group = -1; - state.finished = false; - state.column_ids = column_id_map.empty() ? move(column_ids) : column_id_map; - state.group_offset = 0; - state.group_idx_list = move(groups_to_read); - state.filters = filters; - state.sel.Initialize(STANDARD_VECTOR_SIZE); - - if (!state.file_handle || state.file_handle->path != file_handle->path) { - auto flags = FileFlags::FILE_FLAGS_READ; - - if (!file_handle->OnDiskFile() && file_handle->CanSeek()) { - state.prefetch_mode = true; - flags |= FileFlags::FILE_FLAGS_DIRECT_IO; - } else { - state.prefetch_mode = false; - } - - state.file_handle = file_handle->file_system.OpenFile(file_handle->path, flags, FileSystem::DEFAULT_LOCK, - FileSystem::DEFAULT_COMPRESSION, file_opener); - } - - state.thrift_file_proto = CreateThriftProtocol(allocator, *state.file_handle, *file_opener, state.prefetch_mode); - state.root_reader = CreateReader(GetFileMetadata()); - - state.define_buf.resize(allocator, STANDARD_VECTOR_SIZE); - state.repeat_buf.resize(allocator, STANDARD_VECTOR_SIZE); -} - -void FilterIsNull(Vector &v, parquet_filter_t &filter_mask, idx_t count) { - auto &mask = FlatVector::Validity(v); - if (mask.AllValid()) { - filter_mask.reset(); - } else { - for (idx_t i = 0; i < count; i++) { - filter_mask[i] = filter_mask[i] && !mask.RowIsValid(i); - } - } -} - -void FilterIsNotNull(Vector &v, parquet_filter_t &filter_mask, idx_t count) { - auto &mask = FlatVector::Validity(v); - if (!mask.AllValid()) { - for (idx_t i = 0; i < count; i++) { - filter_mask[i] = filter_mask[i] && mask.RowIsValid(i); - } - } -} - -template -void TemplatedFilterOperation(Vector &v, T constant, parquet_filter_t &filter_mask, idx_t count) { - D_ASSERT(v.GetVectorType() == VectorType::FLAT_VECTOR); // we just created the damn thing it better be - - auto v_ptr = FlatVector::GetData(v); - auto &mask = FlatVector::Validity(v); - - if (!mask.AllValid()) { - for (idx_t i = 0; i < count; i++) { - if (mask.RowIsValid(i)) { - filter_mask[i] = filter_mask[i] && OP::Operation(v_ptr[i], constant); - } - } - } else { - for (idx_t i = 0; i < count; i++) { - filter_mask[i] = filter_mask[i] && OP::Operation(v_ptr[i], constant); - } - } -} - -template -void TemplatedFilterOperation(Vector &v, const Value &constant, parquet_filter_t &filter_mask, idx_t count) { - TemplatedFilterOperation(v, constant.template GetValueUnsafe(), filter_mask, count); -} - -template -static void FilterOperationSwitch(Vector &v, Value &constant, parquet_filter_t &filter_mask, idx_t count) { - if (filter_mask.none() || count == 0) { - return; - } - switch (v.GetType().InternalType()) { - case PhysicalType::BOOL: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::UINT8: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::UINT16: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::UINT32: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::UINT64: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT8: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT16: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT32: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT64: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT128: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::FLOAT: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::DOUBLE: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::VARCHAR: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - default: - throw NotImplementedException("Unsupported type for filter %s", v.ToString()); - } -} - -static void ApplyFilter(Vector &v, TableFilter &filter, parquet_filter_t &filter_mask, idx_t count) { - switch (filter.filter_type) { - case TableFilterType::CONJUNCTION_AND: { - auto &conjunction = (ConjunctionAndFilter &)filter; - for (auto &child_filter : conjunction.child_filters) { - ApplyFilter(v, *child_filter, filter_mask, count); - } - break; - } - case TableFilterType::CONJUNCTION_OR: { - auto &conjunction = (ConjunctionOrFilter &)filter; - parquet_filter_t or_mask; - for (auto &child_filter : conjunction.child_filters) { - parquet_filter_t child_mask = filter_mask; - ApplyFilter(v, *child_filter, child_mask, count); - or_mask |= child_mask; - } - filter_mask &= or_mask; - break; - } - case TableFilterType::CONSTANT_COMPARISON: { - auto &constant_filter = (ConstantFilter &)filter; - switch (constant_filter.comparison_type) { - case ExpressionType::COMPARE_EQUAL: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - case ExpressionType::COMPARE_LESSTHAN: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - case ExpressionType::COMPARE_GREATERTHAN: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - default: - D_ASSERT(0); - } - break; - } - case TableFilterType::IS_NOT_NULL: - FilterIsNotNull(v, filter_mask, count); - break; - case TableFilterType::IS_NULL: - FilterIsNull(v, filter_mask, count); - break; - default: - D_ASSERT(0); - break; - } -} - -void ParquetReader::Scan(ParquetReaderScanState &state, DataChunk &result) { - while (ScanInternal(state, result)) { - if (result.size() > 0) { - break; - } - result.Reset(); - } -} - -bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &result) { - if (state.finished) { - return false; - } - - // see if we have to switch to the next row group in the parquet file - if (state.current_group < 0 || (int64_t)state.group_offset >= GetGroup(state).num_rows) { - state.current_group++; - state.group_offset = 0; - - auto &trans = (ThriftFileTransport &)*state.thrift_file_proto->getTransport(); - trans.ClearPrefetch(); - state.current_group_prefetched = false; - - if ((idx_t)state.current_group == state.group_idx_list.size()) { - state.finished = true; - return false; - } - - uint64_t to_scan_compressed_bytes = 0; - for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { - // this is a special case where we are not interested in the actual contents of the file - if (state.column_ids[out_col_idx] == COLUMN_IDENTIFIER_ROW_ID) { - continue; - } - - PrepareRowGroupBuffer(state, out_col_idx); - - auto file_col_idx = state.column_ids[out_col_idx]; - - auto root_reader = ((StructColumnReader *)state.root_reader.get()); - to_scan_compressed_bytes += root_reader->GetChildReader(file_col_idx)->TotalCompressedSize(); - } - - auto &group = GetGroup(state); - if (state.prefetch_mode && state.group_offset != (idx_t)group.num_rows) { - - uint64_t total_row_group_span = GetGroupSpan(state); - - double scan_percentage = (double)(to_scan_compressed_bytes) / total_row_group_span; - - if (to_scan_compressed_bytes > total_row_group_span) { - throw std::runtime_error( - "Malformed parquet file: sum of total compressed bytes of columns seems incorrect"); - } - - if (!state.filters && scan_percentage > ParquetReaderPrefetchConfig::WHOLE_GROUP_PREFETCH_MINIMUM_SCAN) { - // Prefetch the whole row group - if (!state.current_group_prefetched) { - auto total_compressed_size = GetGroupCompressedSize(state); - if (total_compressed_size > 0) { - trans.Prefetch(GetGroupOffset(state), total_row_group_span); - } - state.current_group_prefetched = true; - } - } else { - // lazy fetching is when all tuples in a column can be skipped. With lazy fetching the buffer is only - // fetched on the first read to that buffer. - bool lazy_fetch = state.filters; - - // Prefetch column-wise - for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { - - if (state.column_ids[out_col_idx] == COLUMN_IDENTIFIER_ROW_ID) { - continue; - } - - auto file_col_idx = state.column_ids[out_col_idx]; - auto root_reader = ((StructColumnReader *)state.root_reader.get()); - - bool has_filter = - state.filters && state.filters->filters.find(out_col_idx) != state.filters->filters.end(); - root_reader->GetChildReader(file_col_idx)->RegisterPrefetch(trans, !(lazy_fetch && !has_filter)); - } - - trans.FinalizeRegistration(); - - if (!lazy_fetch) { - trans.PrefetchRegistered(); - } - } - } - return true; - } - - auto this_output_chunk_rows = MinValue(STANDARD_VECTOR_SIZE, GetGroup(state).num_rows - state.group_offset); - result.SetCardinality(this_output_chunk_rows); - - if (this_output_chunk_rows == 0) { - state.finished = true; - return false; // end of last group, we are done - } - - // we evaluate simple table filters directly in this scan so we can skip decoding column data that's never going to - // be relevant - parquet_filter_t filter_mask; - filter_mask.set(); - - // mask out unused part of bitset - for (idx_t i = this_output_chunk_rows; i < STANDARD_VECTOR_SIZE; i++) { - filter_mask.set(i, false); - } - - state.define_buf.zero(); - state.repeat_buf.zero(); - - auto define_ptr = (uint8_t *)state.define_buf.ptr; - auto repeat_ptr = (uint8_t *)state.repeat_buf.ptr; - - auto root_reader = ((StructColumnReader *)state.root_reader.get()); - - if (state.filters) { - vector need_to_read(result.ColumnCount(), true); - - // first load the columns that are used in filters - for (auto &filter_col : state.filters->filters) { - auto file_col_idx = state.column_ids[filter_col.first]; - // row_group skipping of columns that are never scanned. - if (filter_mask.none()) { // if no rows are left we can stop checking filters - break; - } - - root_reader->GetChildReader(file_col_idx) - ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[filter_col.first]); - - need_to_read[filter_col.first] = false; - - ApplyFilter(result.data[filter_col.first], *filter_col.second, filter_mask, this_output_chunk_rows); - } - - // we still may have to read some cols - for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { - if (!need_to_read[out_col_idx]) { - continue; - } - auto file_col_idx = state.column_ids[out_col_idx]; - - if (filter_mask.none()) { - root_reader->GetChildReader(file_col_idx)->Skip(result.size()); - continue; - } - // TODO handle ROWID here, too - root_reader->GetChildReader(file_col_idx) - ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[out_col_idx]); - } - - idx_t sel_size = 0; - for (idx_t i = 0; i < this_output_chunk_rows; i++) { - if (filter_mask[i]) { - state.sel.set_index(sel_size++, i); - } - } - - result.Slice(state.sel, sel_size); - result.Verify(); - - } else { // #nofilter, just fricking load the data - for (idx_t out_col_idx = 0; out_col_idx < result.ColumnCount(); out_col_idx++) { - auto file_col_idx = state.column_ids[out_col_idx]; - - if (file_col_idx == COLUMN_IDENTIFIER_ROW_ID) { - Value constant_42 = Value::BIGINT(42); - result.data[out_col_idx].Reference(constant_42); - continue; - } - - // std::cout << "Reading nofilter for col " << - // root_reader->GetChildReader(file_col_idx)->Schema().name - //<< "\n"; - root_reader->GetChildReader(file_col_idx) - ->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result.data[out_col_idx]); - } - } - - state.group_offset += this_output_chunk_rows; - return true; -} - -} // namespace duckdb - - - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/blob.hpp" -#include "duckdb/common/types/value.hpp" -#include "duckdb/storage/statistics/numeric_statistics.hpp" -#include "duckdb/storage/statistics/string_statistics.hpp" -#endif - -namespace duckdb { - -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Type; - -static unique_ptr CreateNumericStats(const LogicalType &type, - const duckdb_parquet::format::SchemaElement &schema_ele, - const duckdb_parquet::format::Statistics &parquet_stats) { - auto stats = make_unique(type, StatisticsType::LOCAL_STATS); - - // for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and - // `max_value`. All are optional. such elegance. - if (parquet_stats.__isset.min) { - stats->min = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.min).CastAs(type); - } else if (parquet_stats.__isset.min_value) { - stats->min = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.min_value).CastAs(type); - } else { - stats->min = Value(type); - } - if (parquet_stats.__isset.max) { - stats->max = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.max).CastAs(type); - } else if (parquet_stats.__isset.max_value) { - stats->max = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.max_value).CastAs(type); - } else { - stats->max = Value(type); - } - return move(stats); -} - -Value ParquetStatisticsUtils::ConvertValue(const LogicalType &type, - const duckdb_parquet::format::SchemaElement &schema_ele, - const std::string &stats) { - if (stats.empty()) { - return Value(); - } - switch (type.id()) { - case LogicalTypeId::BOOLEAN: { - if (stats.size() != sizeof(bool)) { - throw InternalException("Incorrect stats size for type BOOLEAN"); - } - return Value::BOOLEAN(Load((data_ptr_t)stats.c_str())); - } - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - if (stats.size() != sizeof(uint32_t)) { - throw InternalException("Incorrect stats size for type UINTEGER"); - } - return Value::UINTEGER(Load((data_ptr_t)stats.c_str())); - case LogicalTypeId::UBIGINT: - if (stats.size() != sizeof(uint64_t)) { - throw InternalException("Incorrect stats size for type UBIGINT"); - } - return Value::UBIGINT(Load((data_ptr_t)stats.c_str())); - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - if (stats.size() != sizeof(int32_t)) { - throw InternalException("Incorrect stats size for type INTEGER"); - } - return Value::INTEGER(Load((data_ptr_t)stats.c_str())); - case LogicalTypeId::BIGINT: - if (stats.size() != sizeof(int64_t)) { - throw InternalException("Incorrect stats size for type BIGINT"); - } - return Value::BIGINT(Load((data_ptr_t)stats.c_str())); - case LogicalTypeId::FLOAT: { - if (stats.size() != sizeof(float)) { - throw InternalException("Incorrect stats size for type FLOAT"); - } - auto val = Load((data_ptr_t)stats.c_str()); - if (!Value::FloatIsFinite(val)) { - return Value(); - } - return Value::FLOAT(val); - } - case LogicalTypeId::DOUBLE: { - if (stats.size() != sizeof(double)) { - throw InternalException("Incorrect stats size for type DOUBLE"); - } - auto val = Load((data_ptr_t)stats.c_str()); - if (!Value::DoubleIsFinite(val)) { - return Value(); - } - return Value::DOUBLE(val); - } - case LogicalTypeId::DECIMAL: { - auto width = DecimalType::GetWidth(type); - auto scale = DecimalType::GetScale(type); - switch (schema_ele.type) { - case Type::INT32: { - if (stats.size() != sizeof(int32_t)) { - throw InternalException("Incorrect stats size for type %s", type.ToString()); - } - return Value::DECIMAL(Load((data_ptr_t)stats.c_str()), width, scale); - } - case Type::INT64: { - if (stats.size() != sizeof(int64_t)) { - throw InternalException("Incorrect stats size for type %s", type.ToString()); - } - return Value::DECIMAL(Load((data_ptr_t)stats.c_str()), width, scale); - } - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - switch (type.InternalType()) { - case PhysicalType::INT16: - return Value::DECIMAL( - ParquetDecimalUtils::ReadDecimalValue((const_data_ptr_t)stats.c_str(), stats.size()), - width, scale); - case PhysicalType::INT32: - return Value::DECIMAL( - ParquetDecimalUtils::ReadDecimalValue((const_data_ptr_t)stats.c_str(), stats.size()), - width, scale); - case PhysicalType::INT64: - return Value::DECIMAL( - ParquetDecimalUtils::ReadDecimalValue((const_data_ptr_t)stats.c_str(), stats.size()), - width, scale); - case PhysicalType::INT128: - return Value::DECIMAL( - ParquetDecimalUtils::ReadDecimalValue((const_data_ptr_t)stats.c_str(), stats.size()), - width, scale); - default: - throw InternalException("Unsupported internal type for decimal"); - } - default: - throw InternalException("Unsupported internal type for decimal?.."); - } - } - case LogicalType::VARCHAR: - case LogicalType::BLOB: - if (Value::StringIsValid(stats)) { - return Value(stats); - } else { - return Value(Blob::ToString(string_t(stats))); - } - case LogicalTypeId::DATE: - if (stats.size() != sizeof(int32_t)) { - throw InternalException("Incorrect stats size for type DATE"); - } - return Value::DATE(date_t(Load((data_ptr_t)stats.c_str()))); - case LogicalTypeId::TIME: - if (stats.size() != sizeof(int64_t)) { - throw InternalException("Incorrect stats size for type TIME"); - } - return Value::TIME(dtime_t(Load((data_ptr_t)stats.c_str()))); - case LogicalTypeId::TIMESTAMP: { - if (schema_ele.type == Type::INT96) { - if (stats.size() != sizeof(Int96)) { - throw InternalException("Incorrect stats size for type TIMESTAMP"); - } - return Value::TIMESTAMP(ImpalaTimestampToTimestamp(Load((data_ptr_t)stats.c_str()))); - } else { - D_ASSERT(schema_ele.type == Type::INT64); - if (stats.size() != sizeof(int64_t)) { - throw InternalException("Incorrect stats size for type TIMESTAMP"); - } - auto val = Load((data_ptr_t)stats.c_str()); - if (schema_ele.converted_type == duckdb_parquet::format::ConvertedType::TIMESTAMP_MILLIS) { - return Value::TIMESTAMPMS(timestamp_t(val)); - } else { - return Value::TIMESTAMP(timestamp_t(val)); - } - } - } - default: - throw InternalException("Unsupported type for stats %s", type.ToString()); - } -} - -unique_ptr ParquetStatisticsUtils::TransformColumnStatistics(const SchemaElement &s_ele, - const LogicalType &type, - const ColumnChunk &column_chunk) { - if (!column_chunk.__isset.meta_data || !column_chunk.meta_data.__isset.statistics) { - // no stats present for row group - return nullptr; - } - auto &parquet_stats = column_chunk.meta_data.statistics; - unique_ptr row_group_stats; - - switch (type.id()) { - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - case LogicalTypeId::UBIGINT: - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::FLOAT: - case LogicalTypeId::DOUBLE: - case LogicalTypeId::DATE: - case LogicalTypeId::TIME: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_SEC: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::DECIMAL: - row_group_stats = CreateNumericStats(type, s_ele, parquet_stats); - break; - case LogicalTypeId::VARCHAR: { - auto string_stats = make_unique(type, StatisticsType::LOCAL_STATS); - if (parquet_stats.__isset.min) { - string_stats->Update(parquet_stats.min); - } else if (parquet_stats.__isset.min_value) { - string_stats->Update(parquet_stats.min_value); - } else { - return nullptr; - } - if (parquet_stats.__isset.max) { - string_stats->Update(parquet_stats.max); - } else if (parquet_stats.__isset.max_value) { - string_stats->Update(parquet_stats.max_value); - } else { - return nullptr; - } - string_stats->has_unicode = true; // we dont know better - string_stats->max_string_length = NumericLimits::Maximum(); - row_group_stats = move(string_stats); - break; - } - default: - // no stats for you - break; - } // end of type switch - - // null count is generic - if (row_group_stats) { - if (column_chunk.meta_data.type == duckdb_parquet::format::Type::FLOAT || - column_chunk.meta_data.type == duckdb_parquet::format::Type::DOUBLE) { - // floats/doubles can have infinity, which can become NULL - row_group_stats->validity_stats = make_unique(true); - } else if (parquet_stats.__isset.null_count) { - row_group_stats->validity_stats = make_unique(parquet_stats.null_count != 0); - } else { - row_group_stats->validity_stats = make_unique(true); - } - } else { - // if stats are missing from any row group we know squat - return nullptr; - } - - return row_group_stats; -} - -} // namespace duckdb - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/types/time.hpp" -#include "duckdb/common/types/timestamp.hpp" -#endif - -namespace duckdb { - -// surely they are joking -static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL; -static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL; -static constexpr int64_t NANOSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL * 1000LL; - -int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp) { - int64_t days_since_epoch = impala_timestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS; - auto nanoseconds = Load((data_ptr_t)impala_timestamp.value); - return days_since_epoch * NANOSECONDS_PER_DAY + nanoseconds; -} - -timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts) { - auto impala_ns = ImpalaTimestampToNanoseconds(raw_ts); - return Timestamp::FromEpochNanoSeconds(impala_ns); -} - -Int96 TimestampToImpalaTimestamp(timestamp_t &ts) { - int32_t hour, min, sec, msec; - Time::Convert(Timestamp::GetTime(ts), hour, min, sec, msec); - uint64_t ms_since_midnight = hour * 60 * 60 * 1000 + min * 60 * 1000 + sec * 1000 + msec; - auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / (24 * 60 * 60); - // first two uint32 in Int96 are nanoseconds since midnights - // last uint32 is number of days since year 4713 BC ("Julian date") - Int96 impala_ts; - Store(ms_since_midnight * 1000000, (data_ptr_t)impala_ts.value); - impala_ts.value[2] = days_since_epoch + JULIAN_TO_UNIX_EPOCH_DAYS; - return impala_ts; -} - -timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts) { - return Timestamp::FromEpochMicroSeconds(raw_ts); -} -timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts) { - return Timestamp::FromEpochMs(raw_ts); -} - -date_t ParquetIntToDate(const int32_t &raw_date) { - return date_t(raw_date); -} - -dtime_t ParquetIntToTime(const int64_t &raw_time) { - return dtime_t(raw_time); -} - -} // namespace duckdb - - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/function/table_function.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" -#include "duckdb/main/client_context.hpp" -#include "duckdb/main/connection.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/serializer/buffered_file_writer.hpp" -#endif - -namespace duckdb { - -using namespace duckdb_apache::thrift; // NOLINT -using namespace duckdb_apache::thrift::protocol; // NOLINT -using namespace duckdb_apache::thrift::transport; // NOLINT - -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Encoding; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::FileMetaData; -using duckdb_parquet::format::PageHeader; -using duckdb_parquet::format::PageType; -using ParquetRowGroup = duckdb_parquet::format::RowGroup; -using duckdb_parquet::format::Type; - -class MyTransport : public TTransport { -public: - explicit MyTransport(Serializer &serializer) : serializer(serializer) { - } - - bool isOpen() const override { - return true; - } - - void open() override { - } - - void close() override { - } - - void write_virt(const uint8_t *buf, uint32_t len) override { - serializer.WriteData((const_data_ptr_t)buf, len); - } - -private: - Serializer &serializer; -}; - -Type::type ParquetWriter::DuckDBTypeToParquetType(const LogicalType &duckdb_type) { - switch (duckdb_type.id()) { - case LogicalTypeId::BOOLEAN: - return Type::BOOLEAN; - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::DATE: - return Type::INT32; - case LogicalTypeId::BIGINT: - return Type::INT64; - case LogicalTypeId::FLOAT: - return Type::FLOAT; - case LogicalTypeId::DOUBLE: - case LogicalTypeId::HUGEINT: - return Type::DOUBLE; - case LogicalTypeId::ENUM: - case LogicalTypeId::VARCHAR: - case LogicalTypeId::JSON: - case LogicalTypeId::BLOB: - return Type::BYTE_ARRAY; - case LogicalTypeId::TIME: - case LogicalTypeId::TIME_TZ: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_TZ: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::TIMESTAMP_SEC: - return Type::INT64; - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - return Type::INT32; - case LogicalTypeId::UBIGINT: - return Type::INT64; - case LogicalTypeId::INTERVAL: - case LogicalTypeId::UUID: - return Type::FIXED_LEN_BYTE_ARRAY; - case LogicalTypeId::DECIMAL: - switch (duckdb_type.InternalType()) { - case PhysicalType::INT16: - case PhysicalType::INT32: - return Type::INT32; - case PhysicalType::INT64: - return Type::INT64; - case PhysicalType::INT128: - return Type::FIXED_LEN_BYTE_ARRAY; - default: - throw InternalException("Unsupported internal decimal type"); - } - default: - throw NotImplementedException("Unimplemented type for Parquet \"%s\"", duckdb_type.ToString()); - } -} - -void ParquetWriter::SetSchemaProperties(const LogicalType &duckdb_type, - duckdb_parquet::format::SchemaElement &schema_ele) { - switch (duckdb_type.id()) { - case LogicalTypeId::TINYINT: - schema_ele.converted_type = ConvertedType::INT_8; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::SMALLINT: - schema_ele.converted_type = ConvertedType::INT_16; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::INTEGER: - schema_ele.converted_type = ConvertedType::INT_32; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::BIGINT: - schema_ele.converted_type = ConvertedType::INT_64; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::UTINYINT: - schema_ele.converted_type = ConvertedType::UINT_8; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::USMALLINT: - schema_ele.converted_type = ConvertedType::UINT_16; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::UINTEGER: - schema_ele.converted_type = ConvertedType::UINT_32; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::UBIGINT: - schema_ele.converted_type = ConvertedType::UINT_64; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::DATE: - schema_ele.converted_type = ConvertedType::DATE; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::TIME_TZ: - case LogicalTypeId::TIME: - schema_ele.converted_type = ConvertedType::TIME_MICROS; - schema_ele.__isset.converted_type = true; - schema_ele.logicalType.__isset.TIME = true; - schema_ele.logicalType.TIME.isAdjustedToUTC = (duckdb_type.id() == LogicalTypeId::TIME_TZ); - schema_ele.logicalType.TIME.unit.__isset.MICROS = true; - break; - case LogicalTypeId::TIMESTAMP_TZ: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::TIMESTAMP_SEC: - schema_ele.converted_type = ConvertedType::TIMESTAMP_MICROS; - schema_ele.__isset.converted_type = true; - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.TIMESTAMP = true; - schema_ele.logicalType.TIMESTAMP.isAdjustedToUTC = (duckdb_type.id() == LogicalTypeId::TIMESTAMP_TZ); - schema_ele.logicalType.TIMESTAMP.unit.__isset.MICROS = true; - break; - case LogicalTypeId::TIMESTAMP_MS: - schema_ele.converted_type = ConvertedType::TIMESTAMP_MILLIS; - schema_ele.__isset.converted_type = true; - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.TIMESTAMP = true; - schema_ele.logicalType.TIMESTAMP.isAdjustedToUTC = false; - schema_ele.logicalType.TIMESTAMP.unit.__isset.MILLIS = true; - break; - case LogicalTypeId::ENUM: - case LogicalTypeId::VARCHAR: - case LogicalTypeId::JSON: - schema_ele.converted_type = ConvertedType::UTF8; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::INTERVAL: - schema_ele.type_length = 12; - schema_ele.converted_type = ConvertedType::INTERVAL; - schema_ele.__isset.type_length = true; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::UUID: - schema_ele.type_length = 16; - schema_ele.__isset.type_length = true; - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.UUID = true; - break; - case LogicalTypeId::DECIMAL: - schema_ele.converted_type = ConvertedType::DECIMAL; - schema_ele.precision = DecimalType::GetWidth(duckdb_type); - schema_ele.scale = DecimalType::GetScale(duckdb_type); - schema_ele.__isset.converted_type = true; - schema_ele.__isset.precision = true; - schema_ele.__isset.scale = true; - if (duckdb_type.InternalType() == PhysicalType::INT128) { - schema_ele.type_length = 16; - schema_ele.__isset.type_length = true; - } - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.DECIMAL = true; - schema_ele.logicalType.DECIMAL.precision = schema_ele.precision; - schema_ele.logicalType.DECIMAL.scale = schema_ele.scale; - break; - default: - break; - } -} - -ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, FileOpener *file_opener_p, vector types_p, - vector names_p, CompressionCodec::type codec) - : file_name(move(file_name_p)), sql_types(move(types_p)), column_names(move(names_p)), codec(codec) { - // initialize the file writer - writer = make_unique( - fs, file_name.c_str(), FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW, file_opener_p); - // parquet files start with the string "PAR1" - writer->WriteData((const_data_ptr_t) "PAR1", 4); - TCompactProtocolFactoryT tproto_factory; - protocol = tproto_factory.getProtocol(make_shared(*writer)); - - file_meta_data.num_rows = 0; - file_meta_data.version = 1; - - file_meta_data.__isset.created_by = true; - file_meta_data.created_by = "DuckDB"; - - file_meta_data.schema.resize(1); - - // populate root schema object - file_meta_data.schema[0].name = "duckdb_schema"; - file_meta_data.schema[0].num_children = sql_types.size(); - file_meta_data.schema[0].__isset.num_children = true; - file_meta_data.schema[0].repetition_type = duckdb_parquet::format::FieldRepetitionType::REQUIRED; - file_meta_data.schema[0].__isset.repetition_type = true; - - vector schema_path; - for (idx_t i = 0; i < sql_types.size(); i++) { - column_writers.push_back(ColumnWriter::CreateWriterRecursive(file_meta_data.schema, *this, sql_types[i], - column_names[i], schema_path)); - } -} - -void ParquetWriter::Flush(ChunkCollection &buffer) { - if (buffer.Count() == 0) { - return; - } - lock_guard glock(lock); - - // set up a new row group for this chunk collection - ParquetRowGroup row_group; - row_group.num_rows = buffer.Count(); - row_group.file_offset = writer->GetTotalWritten(); - row_group.__isset.file_offset = true; - - // iterate over each of the columns of the chunk collection and write them - auto &chunks = buffer.Chunks(); - D_ASSERT(buffer.ColumnCount() == column_writers.size()); - for (idx_t col_idx = 0; col_idx < buffer.ColumnCount(); col_idx++) { - auto write_state = column_writers[col_idx]->InitializeWriteState(row_group); - for (idx_t chunk_idx = 0; chunk_idx < chunks.size(); chunk_idx++) { - column_writers[col_idx]->Prepare(*write_state, nullptr, chunks[chunk_idx]->data[col_idx], - chunks[chunk_idx]->size()); - } - column_writers[col_idx]->BeginWrite(*write_state); - for (idx_t chunk_idx = 0; chunk_idx < chunks.size(); chunk_idx++) { - column_writers[col_idx]->Write(*write_state, chunks[chunk_idx]->data[col_idx], chunks[chunk_idx]->size()); - } - column_writers[col_idx]->FinalizeWrite(*write_state); - } - - // append the row group to the file meta data - file_meta_data.row_groups.push_back(row_group); - file_meta_data.num_rows += buffer.Count(); -} - -void ParquetWriter::Finalize() { - auto start_offset = writer->GetTotalWritten(); - file_meta_data.write(protocol.get()); - - writer->Write(writer->GetTotalWritten() - start_offset); - - // parquet files also end with the string "PAR1" - writer->WriteData((const_data_ptr_t) "PAR1", 4); - - // flush to disk - writer->Sync(); - writer.reset(); -} - -} // namespace duckdb - - - -namespace duckdb { - -struct ZstdStreamWrapper : public StreamWrapper { - ~ZstdStreamWrapper() override; - - CompressedFile *file = nullptr; - duckdb_zstd::ZSTD_DStream *zstd_stream_ptr = nullptr; - duckdb_zstd::ZSTD_CStream *zstd_compress_ptr = nullptr; - bool writing = false; - -public: - void Initialize(CompressedFile &file, bool write) override; - bool Read(StreamData &stream_data) override; - void Write(CompressedFile &file, StreamData &stream_data, data_ptr_t buffer, int64_t nr_bytes) override; - - void Close() override; - - void FlushStream(); -}; - -ZstdStreamWrapper::~ZstdStreamWrapper() { - if (Exception::UncaughtException()) { - return; - } - try { - Close(); - } catch (...) { - } -} - -void ZstdStreamWrapper::Initialize(CompressedFile &file, bool write) { - Close(); - this->file = &file; - this->writing = write; - if (write) { - zstd_compress_ptr = duckdb_zstd::ZSTD_createCStream(); - } else { - zstd_stream_ptr = duckdb_zstd::ZSTD_createDStream(); - } -} - -bool ZstdStreamWrapper::Read(StreamData &sd) { - D_ASSERT(!writing); - - duckdb_zstd::ZSTD_inBuffer in_buffer; - duckdb_zstd::ZSTD_outBuffer out_buffer; - - in_buffer.src = sd.in_buff_start; - in_buffer.size = sd.in_buff_end - sd.in_buff_start; - in_buffer.pos = 0; - - out_buffer.dst = sd.out_buff_start; - out_buffer.size = sd.out_buf_size; - out_buffer.pos = 0; - - auto res = duckdb_zstd::ZSTD_decompressStream(zstd_stream_ptr, &out_buffer, &in_buffer); - if (duckdb_zstd::ZSTD_isError(res)) { - throw IOException(duckdb_zstd::ZSTD_getErrorName(res)); - } - - sd.in_buff_start = (data_ptr_t)in_buffer.src + in_buffer.pos; - sd.in_buff_end = (data_ptr_t)in_buffer.src + in_buffer.size; - sd.out_buff_end = (data_ptr_t)out_buffer.dst + out_buffer.pos; - return false; -} - -void ZstdStreamWrapper::Write(CompressedFile &file, StreamData &sd, data_ptr_t uncompressed_data, - int64_t uncompressed_size) { - D_ASSERT(writing); - - auto remaining = uncompressed_size; - while (remaining > 0) { - D_ASSERT(sd.out_buff.get() + sd.out_buf_size > sd.out_buff_start); - idx_t output_remaining = (sd.out_buff.get() + sd.out_buf_size) - sd.out_buff_start; - - duckdb_zstd::ZSTD_inBuffer in_buffer; - duckdb_zstd::ZSTD_outBuffer out_buffer; - - in_buffer.src = uncompressed_data; - in_buffer.size = remaining; - in_buffer.pos = 0; - - out_buffer.dst = sd.out_buff_start; - out_buffer.size = output_remaining; - out_buffer.pos = 0; - auto res = - duckdb_zstd::ZSTD_compressStream2(zstd_compress_ptr, &out_buffer, &in_buffer, duckdb_zstd::ZSTD_e_continue); - if (duckdb_zstd::ZSTD_isError(res)) { - throw IOException(duckdb_zstd::ZSTD_getErrorName(res)); - } - idx_t input_consumed = in_buffer.pos; - idx_t written_to_output = out_buffer.pos; - sd.out_buff_start += written_to_output; - if (sd.out_buff_start == sd.out_buff.get() + sd.out_buf_size) { - // no more output buffer available: flush - file.child_handle->Write(sd.out_buff.get(), sd.out_buff_start - sd.out_buff.get()); - sd.out_buff_start = sd.out_buff.get(); - } - uncompressed_data += input_consumed; - remaining -= input_consumed; - } -} - -void ZstdStreamWrapper::FlushStream() { - auto &sd = file->stream_data; - duckdb_zstd::ZSTD_inBuffer in_buffer; - duckdb_zstd::ZSTD_outBuffer out_buffer; - - in_buffer.src = nullptr; - in_buffer.size = 0; - in_buffer.pos = 0; - while (true) { - idx_t output_remaining = (sd.out_buff.get() + sd.out_buf_size) - sd.out_buff_start; - - out_buffer.dst = sd.out_buff_start; - out_buffer.size = output_remaining; - out_buffer.pos = 0; - - auto res = - duckdb_zstd::ZSTD_compressStream2(zstd_compress_ptr, &out_buffer, &in_buffer, duckdb_zstd::ZSTD_e_end); - if (duckdb_zstd::ZSTD_isError(res)) { - throw IOException(duckdb_zstd::ZSTD_getErrorName(res)); - } - idx_t written_to_output = out_buffer.pos; - sd.out_buff_start += written_to_output; - if (sd.out_buff_start > sd.out_buff.get()) { - file->child_handle->Write(sd.out_buff.get(), sd.out_buff_start - sd.out_buff.get()); - sd.out_buff_start = sd.out_buff.get(); - } - if (res == 0) { - break; - } - } -} - -void ZstdStreamWrapper::Close() { - if (!zstd_stream_ptr && !zstd_compress_ptr) { - return; - } - if (writing) { - FlushStream(); - } - if (zstd_stream_ptr) { - duckdb_zstd::ZSTD_freeDStream(zstd_stream_ptr); - } - if (zstd_compress_ptr) { - duckdb_zstd::ZSTD_freeCStream(zstd_compress_ptr); - } - zstd_stream_ptr = nullptr; - zstd_compress_ptr = nullptr; -} - -class ZStdFile : public CompressedFile { -public: - ZStdFile(unique_ptr child_handle_p, const string &path, bool write) - : CompressedFile(zstd_fs, move(child_handle_p), path) { - Initialize(write); - } - - ZStdFileSystem zstd_fs; -}; - -unique_ptr ZStdFileSystem::OpenCompressedFile(unique_ptr handle, bool write) { - auto path = handle->path; - return make_unique(move(handle), path, write); -} - -unique_ptr ZStdFileSystem::CreateStream() { - return make_unique(); -} - -idx_t ZStdFileSystem::InBufferSize() { - return duckdb_zstd::ZSTD_DStreamInSize(); -} - -idx_t ZStdFileSystem::OutBufferSize() { - return duckdb_zstd::ZSTD_DStreamOutSize(); -} - -} // namespace duckdb - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include -#include - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -#ifndef THRIFT_EXPORT_H -#define THRIFT_EXPORT_H - -#ifdef THRIFT_STATIC_DEFINE -# define THRIFT_EXPORT -#elif defined(_MSC_VER ) -# ifndef THRIFT_EXPORT -# ifdef thrift_EXPORTS - /* We are building this library */ -# define THRIFT_EXPORT __declspec(dllexport) -# else - /* We are using this library */ -# define THRIFT_EXPORT __declspec(dllimport) -# endif -# endif -#else -# define THRIFT_EXPORT -#endif - -#endif /* THRIFT_EXPORT_H */ - - -// LICENSE_CHANGE_END - - - - -using std::string; - -namespace duckdb_apache { -namespace thrift { -namespace transport { - - - - -void TMemoryBuffer::computeRead(uint32_t len, uint8_t** out_start, uint32_t* out_give) { - // Correct rBound_ so we can use the fast path in the future. - rBound_ = wBase_; - - // Decide how much to give. - uint32_t give = (std::min)(len, available_read()); - - *out_start = rBase_; - *out_give = give; - - // Preincrement rBase_ so the caller doesn't have to. - rBase_ += give; -} - -uint32_t TMemoryBuffer::readSlow(uint8_t* buf, uint32_t len) { - uint8_t* start; - uint32_t give; - computeRead(len, &start, &give); - - // Copy into the provided buffer. - memcpy(buf, start, give); - - return give; -} - -uint32_t TMemoryBuffer::readAppendToString(std::string& str, uint32_t len) { - // Don't get some stupid assertion failure. - if (buffer_ == nullptr) { - return 0; - } - - uint8_t* start; - uint32_t give; - computeRead(len, &start, &give); - - // Append to the provided string. - str.append((char*)start, give); - - return give; -} - -void TMemoryBuffer::ensureCanWrite(uint32_t len) { - // Check available space - uint32_t avail = available_write(); - if (len <= avail) { - return; - } - - if (!owner_) { - throw TTransportException("Insufficient space in external MemoryBuffer"); - } - - // Grow the buffer as necessary. - uint64_t new_size = bufferSize_; - while (len > avail) { - new_size = new_size > 0 ? new_size * 2 : 1; - if (new_size > maxBufferSize_) { - throw TTransportException(TTransportException::BAD_ARGS, - "Internal buffer size overflow"); - } - avail = available_write() + (static_cast(new_size) - bufferSize_); - } - - // Allocate into a new pointer so we don't bork ours if it fails. - auto* new_buffer = static_cast(std::realloc(buffer_, new_size)); - if (new_buffer == nullptr) { - throw std::bad_alloc(); - } - - rBase_ = new_buffer + (rBase_ - buffer_); - rBound_ = new_buffer + (rBound_ - buffer_); - wBase_ = new_buffer + (wBase_ - buffer_); - wBound_ = new_buffer + new_size; - buffer_ = new_buffer; - bufferSize_ = static_cast(new_size); -} - -void TMemoryBuffer::writeSlow(const uint8_t* buf, uint32_t len) { - ensureCanWrite(len); - - // Copy into the buffer and increment wBase_. - memcpy(wBase_, buf, len); - wBase_ += len; -} - -void TMemoryBuffer::wroteBytes(uint32_t len) { - uint32_t avail = available_write(); - if (len > avail) { - throw TTransportException("Client wrote more bytes than size of buffer."); - } - wBase_ += len; -} - -const uint8_t* TMemoryBuffer::borrowSlow(uint8_t* buf, uint32_t* len) { - (void)buf; - rBound_ = wBase_; - if (available_read() >= *len) { - *len = available_read(); - return rBase_; - } - return nullptr; -} -} -} -} // duckdb_apache::thrift::transport - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - -#include - - - -using std::string; - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -const char* TTransportException::what() const noexcept { - if (message_.empty()) { - switch (type_) { - case UNKNOWN: - return "TTransportException: Unknown transport exception"; - case NOT_OPEN: - return "TTransportException: Transport not open"; - case TIMED_OUT: - return "TTransportException: Timed out"; - case END_OF_FILE: - return "TTransportException: End of file"; - case INTERRUPTED: - return "TTransportException: Interrupted"; - case BAD_ARGS: - return "TTransportException: Invalid arguments"; - case CORRUPTED_DATA: - return "TTransportException: Corrupted Data"; - case INTERNAL_ERROR: - return "TTransportException: Internal error"; - default: - return "TTransportException: (Invalid exception type)"; - } - } else { - return message_.c_str(); - } -} -} -} -} // duckdb_apache::thrift::transport - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 -// See the end of this file for a list - -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 -// See the end of this file for a list - -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#ifndef parquet_CONSTANTS_H -#define parquet_CONSTANTS_H - - - -namespace duckdb_parquet { namespace format { - -class parquetConstants { - public: - parquetConstants(); - -}; - -extern const parquetConstants g_parquet_constants; - -}} // namespace - -#endif - - -// LICENSE_CHANGE_END - - -namespace duckdb_parquet { namespace format { - -const parquetConstants g_parquet_constants; - -parquetConstants::parquetConstants() { -} - -}} // namespace - - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 -// See the end of this file for a list - -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ - - -#include -#include - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TOSTRING_H_ -#define _DUCKDB_THRIFT_TOSTRING_H_ 1 - -#include -#include -#include -#include -#include -#include -#include - -namespace duckdb_apache { -namespace thrift { - -template -std::string to_string(const T& t) { - std::ostringstream o; - o << t; - return o.str(); -} - -// TODO: replace the computations below with std::numeric_limits::max_digits10 once C++11 -// is enabled. -inline std::string to_string(const float& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); -} - -inline std::string to_string(const double& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); -} - -inline std::string to_string(const long double& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); -} - -template -std::string to_string(const std::map& m); - -template -std::string to_string(const std::set& s); - -template -std::string to_string(const std::vector& t); - -template -std::string to_string(const typename std::pair& v) { - std::ostringstream o; - o << to_string(v.first) << ": " << to_string(v.second); - return o.str(); -} - -template -std::string to_string(const T& beg, const T& end) { - std::ostringstream o; - for (T it = beg; it != end; ++it) { - if (it != beg) - o << ", "; - o << to_string(*it); - } - return o.str(); -} - -template -std::string to_string(const std::vector& t) { - std::ostringstream o; - o << "[" << to_string(t.begin(), t.end()) << "]"; - return o.str(); -} - -template -std::string to_string(const std::map& m) { - std::ostringstream o; - o << "{" << to_string(m.begin(), m.end()) << "}"; - return o.str(); -} - -template -std::string to_string(const std::set& s) { - std::ostringstream o; - o << "{" << to_string(s.begin(), s.end()) << "}"; - return o.str(); -} -} -} // duckdb_apache::thrift - -#endif // _DUCKDB_THRIFT_TOSTRING_H_ - - -// LICENSE_CHANGE_END - - -namespace duckdb_parquet { namespace format { - -int _kTypeValues[] = { - Type::BOOLEAN, - Type::INT32, - Type::INT64, - Type::INT96, - Type::FLOAT, - Type::DOUBLE, - Type::BYTE_ARRAY, - Type::FIXED_LEN_BYTE_ARRAY -}; -const char* _kTypeNames[] = { - "BOOLEAN", - "INT32", - "INT64", - "INT96", - "FLOAT", - "DOUBLE", - "BYTE_ARRAY", - "FIXED_LEN_BYTE_ARRAY" -}; -const std::map _Type_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const Type::type& val) { - std::map::const_iterator it = _Type_VALUES_TO_NAMES.find(val); - if (it != _Type_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kConvertedTypeValues[] = { - ConvertedType::UTF8, - ConvertedType::MAP, - ConvertedType::MAP_KEY_VALUE, - ConvertedType::LIST, - ConvertedType::ENUM, - ConvertedType::DECIMAL, - ConvertedType::DATE, - ConvertedType::TIME_MILLIS, - ConvertedType::TIME_MICROS, - ConvertedType::TIMESTAMP_MILLIS, - ConvertedType::TIMESTAMP_MICROS, - ConvertedType::UINT_8, - ConvertedType::UINT_16, - ConvertedType::UINT_32, - ConvertedType::UINT_64, - ConvertedType::INT_8, - ConvertedType::INT_16, - ConvertedType::INT_32, - ConvertedType::INT_64, - ConvertedType::JSON, - ConvertedType::BSON, - ConvertedType::INTERVAL -}; -const char* _kConvertedTypeNames[] = { - "UTF8", - "MAP", - "MAP_KEY_VALUE", - "LIST", - "ENUM", - "DECIMAL", - "DATE", - "TIME_MILLIS", - "TIME_MICROS", - "TIMESTAMP_MILLIS", - "TIMESTAMP_MICROS", - "UINT_8", - "UINT_16", - "UINT_32", - "UINT_64", - "INT_8", - "INT_16", - "INT_32", - "INT_64", - "JSON", - "BSON", - "INTERVAL" -}; -const std::map _ConvertedType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(22, _kConvertedTypeValues, _kConvertedTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val) { - std::map::const_iterator it = _ConvertedType_VALUES_TO_NAMES.find(val); - if (it != _ConvertedType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kFieldRepetitionTypeValues[] = { - FieldRepetitionType::REQUIRED, - FieldRepetitionType::OPTIONAL, - FieldRepetitionType::REPEATED -}; -const char* _kFieldRepetitionTypeNames[] = { - "REQUIRED", - "OPTIONAL", - "REPEATED" -}; -const std::map _FieldRepetitionType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val) { - std::map::const_iterator it = _FieldRepetitionType_VALUES_TO_NAMES.find(val); - if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kEncodingValues[] = { - Encoding::PLAIN, - Encoding::PLAIN_DICTIONARY, - Encoding::RLE, - Encoding::BIT_PACKED, - Encoding::DELTA_BINARY_PACKED, - Encoding::DELTA_LENGTH_BYTE_ARRAY, - Encoding::DELTA_BYTE_ARRAY, - Encoding::RLE_DICTIONARY -}; -const char* _kEncodingNames[] = { - "PLAIN", - "PLAIN_DICTIONARY", - "RLE", - "BIT_PACKED", - "DELTA_BINARY_PACKED", - "DELTA_LENGTH_BYTE_ARRAY", - "DELTA_BYTE_ARRAY", - "RLE_DICTIONARY" -}; -const std::map _Encoding_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(8, _kEncodingValues, _kEncodingNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const Encoding::type& val) { - std::map::const_iterator it = _Encoding_VALUES_TO_NAMES.find(val); - if (it != _Encoding_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kCompressionCodecValues[] = { - CompressionCodec::UNCOMPRESSED, - CompressionCodec::SNAPPY, - CompressionCodec::GZIP, - CompressionCodec::LZO, - CompressionCodec::BROTLI, - CompressionCodec::LZ4, - CompressionCodec::ZSTD -}; -const char* _kCompressionCodecNames[] = { - "UNCOMPRESSED", - "SNAPPY", - "GZIP", - "LZO", - "BROTLI", - "LZ4", - "ZSTD" -}; -const std::map _CompressionCodec_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(7, _kCompressionCodecValues, _kCompressionCodecNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) { - std::map::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val); - if (it != _CompressionCodec_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kPageTypeValues[] = { - PageType::DATA_PAGE, - PageType::INDEX_PAGE, - PageType::DICTIONARY_PAGE, - PageType::DATA_PAGE_V2 -}; -const char* _kPageTypeNames[] = { - "DATA_PAGE", - "INDEX_PAGE", - "DICTIONARY_PAGE", - "DATA_PAGE_V2" -}; -const std::map _PageType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const PageType::type& val) { - std::map::const_iterator it = _PageType_VALUES_TO_NAMES.find(val); - if (it != _PageType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kBoundaryOrderValues[] = { - BoundaryOrder::UNORDERED, - BoundaryOrder::ASCENDING, - BoundaryOrder::DESCENDING -}; -const char* _kBoundaryOrderNames[] = { - "UNORDERED", - "ASCENDING", - "DESCENDING" -}; -const std::map _BoundaryOrder_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(3, _kBoundaryOrderValues, _kBoundaryOrderNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val) { - std::map::const_iterator it = _BoundaryOrder_VALUES_TO_NAMES.find(val); - if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - - -Statistics::~Statistics() throw() { -} - - -void Statistics::__set_max(const std::string& val) { - this->max = val; -__isset.max = true; -} - -void Statistics::__set_min(const std::string& val) { - this->min = val; -__isset.min = true; -} - -void Statistics::__set_null_count(const int64_t val) { - this->null_count = val; -__isset.null_count = true; -} - -void Statistics::__set_distinct_count(const int64_t val) { - this->distinct_count = val; -__isset.distinct_count = true; -} - -void Statistics::__set_max_value(const std::string& val) { - this->max_value = val; -__isset.max_value = true; -} - -void Statistics::__set_min_value(const std::string& val) { - this->min_value = val; -__isset.min_value = true; -} -std::ostream& operator<<(std::ostream& out, const Statistics& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t Statistics::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max); - this->__isset.max = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min); - this->__isset.min = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->null_count); - this->__isset.null_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->distinct_count); - this->__isset.distinct_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max_value); - this->__isset.max_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min_value); - this->__isset.min_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Statistics::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Statistics"); - - if (this->__isset.max) { - xfer += oprot->writeFieldBegin("max", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->max); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min) { - xfer += oprot->writeFieldBegin("min", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->min); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.null_count) { - xfer += oprot->writeFieldBegin("null_count", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->null_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.distinct_count) { - xfer += oprot->writeFieldBegin("distinct_count", ::duckdb_apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->distinct_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.max_value) { - xfer += oprot->writeFieldBegin("max_value", ::duckdb_apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeBinary(this->max_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min_value) { - xfer += oprot->writeFieldBegin("min_value", ::duckdb_apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeBinary(this->min_value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(Statistics &a, Statistics &b) { - using ::std::swap; - swap(a.max, b.max); - swap(a.min, b.min); - swap(a.null_count, b.null_count); - swap(a.distinct_count, b.distinct_count); - swap(a.max_value, b.max_value); - swap(a.min_value, b.min_value); - swap(a.__isset, b.__isset); -} - -Statistics::Statistics(const Statistics& other0) { - max = other0.max; - min = other0.min; - null_count = other0.null_count; - distinct_count = other0.distinct_count; - max_value = other0.max_value; - min_value = other0.min_value; - __isset = other0.__isset; -} -Statistics& Statistics::operator=(const Statistics& other1) { - max = other1.max; - min = other1.min; - null_count = other1.null_count; - distinct_count = other1.distinct_count; - max_value = other1.max_value; - min_value = other1.min_value; - __isset = other1.__isset; - return *this; -} -void Statistics::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "Statistics("; - out << "max="; (__isset.max ? (out << to_string(max)) : (out << "")); - out << ", " << "min="; (__isset.min ? (out << to_string(min)) : (out << "")); - out << ", " << "null_count="; (__isset.null_count ? (out << to_string(null_count)) : (out << "")); - out << ", " << "distinct_count="; (__isset.distinct_count ? (out << to_string(distinct_count)) : (out << "")); - out << ", " << "max_value="; (__isset.max_value ? (out << to_string(max_value)) : (out << "")); - out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "")); - out << ")"; -} - - -StringType::~StringType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const StringType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t StringType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t StringType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("StringType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(StringType &a, StringType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -StringType::StringType(const StringType& other2) { - (void) other2; -} -StringType& StringType::operator=(const StringType& other3) { - (void) other3; - return *this; -} -void StringType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "StringType("; - out << ")"; -} - - -UUIDType::~UUIDType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const UUIDType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t UUIDType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t UUIDType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("UUIDType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(UUIDType &a, UUIDType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -UUIDType::UUIDType(const UUIDType& other4) { - (void) other4; -} -UUIDType& UUIDType::operator=(const UUIDType& other5) { - (void) other5; - return *this; -} -void UUIDType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "UUIDType("; - out << ")"; -} - - -MapType::~MapType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const MapType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t MapType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MapType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MapType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MapType &a, MapType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -MapType::MapType(const MapType& other6) { - (void) other6; -} -MapType& MapType::operator=(const MapType& other7) { - (void) other7; - return *this; -} -void MapType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MapType("; - out << ")"; -} - - -ListType::~ListType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const ListType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ListType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ListType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ListType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ListType &a, ListType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -ListType::ListType(const ListType& other8) { - (void) other8; -} -ListType& ListType::operator=(const ListType& other9) { - (void) other9; - return *this; -} -void ListType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ListType("; - out << ")"; -} - - -EnumType::~EnumType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const EnumType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t EnumType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EnumType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EnumType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EnumType &a, EnumType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -EnumType::EnumType(const EnumType& other10) { - (void) other10; -} -EnumType& EnumType::operator=(const EnumType& other11) { - (void) other11; - return *this; -} -void EnumType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EnumType("; - out << ")"; -} - - -DateType::~DateType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const DateType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DateType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t DateType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DateType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DateType &a, DateType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -DateType::DateType(const DateType& other12) { - (void) other12; -} -DateType& DateType::operator=(const DateType& other13) { - (void) other13; - return *this; -} -void DateType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DateType("; - out << ")"; -} - - -NullType::~NullType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const NullType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t NullType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NullType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NullType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(NullType &a, NullType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -NullType::NullType(const NullType& other14) { - (void) other14; -} -NullType& NullType::operator=(const NullType& other15) { - (void) other15; - return *this; -} -void NullType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "NullType("; - out << ")"; -} - - -DecimalType::~DecimalType() throw() { -} - - -void DecimalType::__set_scale(const int32_t val) { - this->scale = val; -} - -void DecimalType::__set_precision(const int32_t val) { - this->precision = val; -} -std::ostream& operator<<(std::ostream& out, const DecimalType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DecimalType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_scale = false; - bool isset_precision = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - isset_scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - isset_precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_scale) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_precision) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DecimalType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DecimalType"); - - xfer += oprot->writeFieldBegin("scale", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("precision", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DecimalType &a, DecimalType &b) { - using ::std::swap; - swap(a.scale, b.scale); - swap(a.precision, b.precision); -} - -DecimalType::DecimalType(const DecimalType& other16) { - scale = other16.scale; - precision = other16.precision; -} -DecimalType& DecimalType::operator=(const DecimalType& other17) { - scale = other17.scale; - precision = other17.precision; - return *this; -} -void DecimalType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DecimalType("; - out << "scale=" << to_string(scale); - out << ", " << "precision=" << to_string(precision); - out << ")"; -} - - -MilliSeconds::~MilliSeconds() throw() { -} - -std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t MilliSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MilliSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MilliSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MilliSeconds &a, MilliSeconds &b) { - using ::std::swap; - (void) a; - (void) b; -} - -MilliSeconds::MilliSeconds(const MilliSeconds& other18) { - (void) other18; -} -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other19) { - (void) other19; - return *this; -} -void MilliSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MilliSeconds("; - out << ")"; -} - - -MicroSeconds::~MicroSeconds() throw() { -} - -std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t MicroSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MicroSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MicroSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MicroSeconds &a, MicroSeconds &b) { - using ::std::swap; - (void) a; - (void) b; -} - -MicroSeconds::MicroSeconds(const MicroSeconds& other20) { - (void) other20; -} -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other21) { - (void) other21; - return *this; -} -void MicroSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MicroSeconds("; - out << ")"; -} - - -NanoSeconds::~NanoSeconds() throw() { -} - -std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t NanoSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NanoSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NanoSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(NanoSeconds &a, NanoSeconds &b) { - using ::std::swap; - (void) a; - (void) b; -} - -NanoSeconds::NanoSeconds(const NanoSeconds& other22) { - (void) other22; -} -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other23) { - (void) other23; - return *this; -} -void NanoSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "NanoSeconds("; - out << ")"; -} - - -TimeUnit::~TimeUnit() throw() { -} - - -void TimeUnit::__set_MILLIS(const MilliSeconds& val) { - this->MILLIS = val; -__isset.MILLIS = true; -} - -void TimeUnit::__set_MICROS(const MicroSeconds& val) { - this->MICROS = val; -__isset.MICROS = true; -} - -void TimeUnit::__set_NANOS(const NanoSeconds& val) { - this->NANOS = val; -__isset.NANOS = true; -} -std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t TimeUnit::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MILLIS.read(iprot); - this->__isset.MILLIS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MICROS.read(iprot); - this->__isset.MICROS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->NANOS.read(iprot); - this->__isset.NANOS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TimeUnit::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeUnit"); - - if (this->__isset.MILLIS) { - xfer += oprot->writeFieldBegin("MILLIS", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->MILLIS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MICROS) { - xfer += oprot->writeFieldBegin("MICROS", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MICROS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.NANOS) { - xfer += oprot->writeFieldBegin("NANOS", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->NANOS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimeUnit &a, TimeUnit &b) { - using ::std::swap; - swap(a.MILLIS, b.MILLIS); - swap(a.MICROS, b.MICROS); - swap(a.NANOS, b.NANOS); - swap(a.__isset, b.__isset); -} - -TimeUnit::TimeUnit(const TimeUnit& other24) { - MILLIS = other24.MILLIS; - MICROS = other24.MICROS; - NANOS = other24.NANOS; - __isset = other24.__isset; -} -TimeUnit& TimeUnit::operator=(const TimeUnit& other25) { - MILLIS = other25.MILLIS; - MICROS = other25.MICROS; - NANOS = other25.NANOS; - __isset = other25.__isset; - return *this; -} -void TimeUnit::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimeUnit("; - out << "MILLIS="; (__isset.MILLIS ? (out << to_string(MILLIS)) : (out << "")); - out << ", " << "MICROS="; (__isset.MICROS ? (out << to_string(MICROS)) : (out << "")); - out << ", " << "NANOS="; (__isset.NANOS ? (out << to_string(NANOS)) : (out << "")); - out << ")"; -} - - -TimestampType::~TimestampType() throw() { -} - - -void TimestampType::__set_isAdjustedToUTC(const bool val) { - this->isAdjustedToUTC = val; -} - -void TimestampType::__set_unit(const TimeUnit& val) { - this->unit = val; -} -std::ostream& operator<<(std::ostream& out, const TimestampType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t TimestampType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimestampType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimestampType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::duckdb_apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimestampType &a, TimestampType &b) { - using ::std::swap; - swap(a.isAdjustedToUTC, b.isAdjustedToUTC); - swap(a.unit, b.unit); -} - -TimestampType::TimestampType(const TimestampType& other26) { - isAdjustedToUTC = other26.isAdjustedToUTC; - unit = other26.unit; -} -TimestampType& TimestampType::operator=(const TimestampType& other27) { - isAdjustedToUTC = other27.isAdjustedToUTC; - unit = other27.unit; - return *this; -} -void TimestampType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimestampType("; - out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); - out << ", " << "unit=" << to_string(unit); - out << ")"; -} - - -TimeType::~TimeType() throw() { -} - - -void TimeType::__set_isAdjustedToUTC(const bool val) { - this->isAdjustedToUTC = val; -} - -void TimeType::__set_unit(const TimeUnit& val) { - this->unit = val; -} -std::ostream& operator<<(std::ostream& out, const TimeType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t TimeType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimeType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::duckdb_apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimeType &a, TimeType &b) { - using ::std::swap; - swap(a.isAdjustedToUTC, b.isAdjustedToUTC); - swap(a.unit, b.unit); -} - -TimeType::TimeType(const TimeType& other28) { - isAdjustedToUTC = other28.isAdjustedToUTC; - unit = other28.unit; -} -TimeType& TimeType::operator=(const TimeType& other29) { - isAdjustedToUTC = other29.isAdjustedToUTC; - unit = other29.unit; - return *this; -} -void TimeType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimeType("; - out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); - out << ", " << "unit=" << to_string(unit); - out << ")"; -} - - -IntType::~IntType() throw() { -} - - -void IntType::__set_bitWidth(const int8_t val) { - this->bitWidth = val; -} - -void IntType::__set_isSigned(const bool val) { - this->isSigned = val; -} -std::ostream& operator<<(std::ostream& out, const IntType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t IntType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_bitWidth = false; - bool isset_isSigned = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BYTE) { - xfer += iprot->readByte(this->bitWidth); - isset_bitWidth = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isSigned); - isset_isSigned = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_bitWidth) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_isSigned) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t IntType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IntType"); - - xfer += oprot->writeFieldBegin("bitWidth", ::duckdb_apache::thrift::protocol::T_BYTE, 1); - xfer += oprot->writeByte(this->bitWidth); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("isSigned", ::duckdb_apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->isSigned); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(IntType &a, IntType &b) { - using ::std::swap; - swap(a.bitWidth, b.bitWidth); - swap(a.isSigned, b.isSigned); -} - -IntType::IntType(const IntType& other30) { - bitWidth = other30.bitWidth; - isSigned = other30.isSigned; -} -IntType& IntType::operator=(const IntType& other31) { - bitWidth = other31.bitWidth; - isSigned = other31.isSigned; - return *this; -} -void IntType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "IntType("; - out << "bitWidth=" << to_string(bitWidth); - out << ", " << "isSigned=" << to_string(isSigned); - out << ")"; -} - - -JsonType::~JsonType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const JsonType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t JsonType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t JsonType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("JsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(JsonType &a, JsonType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -JsonType::JsonType(const JsonType& other32) { - (void) other32; -} -JsonType& JsonType::operator=(const JsonType& other33) { - (void) other33; - return *this; -} -void JsonType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "JsonType("; - out << ")"; -} - - -BsonType::~BsonType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const BsonType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t BsonType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BsonType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(BsonType &a, BsonType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -BsonType::BsonType(const BsonType& other34) { - (void) other34; -} -BsonType& BsonType::operator=(const BsonType& other35) { - (void) other35; - return *this; -} -void BsonType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "BsonType("; - out << ")"; -} - - -LogicalType::~LogicalType() throw() { -} - - -void LogicalType::__set_STRING(const StringType& val) { - this->STRING = val; -__isset.STRING = true; -} - -void LogicalType::__set_MAP(const MapType& val) { - this->MAP = val; -__isset.MAP = true; -} - -void LogicalType::__set_LIST(const ListType& val) { - this->LIST = val; -__isset.LIST = true; -} - -void LogicalType::__set_ENUM(const EnumType& val) { - this->ENUM = val; -__isset.ENUM = true; -} - -void LogicalType::__set_DECIMAL(const DecimalType& val) { - this->DECIMAL = val; -__isset.DECIMAL = true; -} - -void LogicalType::__set_DATE(const DateType& val) { - this->DATE = val; -__isset.DATE = true; -} - -void LogicalType::__set_TIME(const TimeType& val) { - this->TIME = val; -__isset.TIME = true; -} - -void LogicalType::__set_TIMESTAMP(const TimestampType& val) { - this->TIMESTAMP = val; -__isset.TIMESTAMP = true; -} - -void LogicalType::__set_INTEGER(const IntType& val) { - this->INTEGER = val; -__isset.INTEGER = true; -} - -void LogicalType::__set_UNKNOWN(const NullType& val) { - this->UNKNOWN = val; -__isset.UNKNOWN = true; -} - -void LogicalType::__set_JSON(const JsonType& val) { - this->JSON = val; -__isset.JSON = true; -} - -void LogicalType::__set_BSON(const BsonType& val) { - this->BSON = val; -__isset.BSON = true; -} - -void LogicalType::__set_UUID(const UUIDType& val) { - this->UUID = val; -__isset.UUID = true; -} -std::ostream& operator<<(std::ostream& out, const LogicalType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t LogicalType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->STRING.read(iprot); - this->__isset.STRING = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MAP.read(iprot); - this->__isset.MAP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->LIST.read(iprot); - this->__isset.LIST = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENUM.read(iprot); - this->__isset.ENUM = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->DECIMAL.read(iprot); - this->__isset.DECIMAL = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->DATE.read(iprot); - this->__isset.DATE = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TIME.read(iprot); - this->__isset.TIME = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TIMESTAMP.read(iprot); - this->__isset.TIMESTAMP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->INTEGER.read(iprot); - this->__isset.INTEGER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->UNKNOWN.read(iprot); - this->__isset.UNKNOWN = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->JSON.read(iprot); - this->__isset.JSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->BSON.read(iprot); - this->__isset.BSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->UUID.read(iprot); - this->__isset.UUID = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t LogicalType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("LogicalType"); - - if (this->__isset.STRING) { - xfer += oprot->writeFieldBegin("STRING", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->STRING.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MAP) { - xfer += oprot->writeFieldBegin("MAP", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MAP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.LIST) { - xfer += oprot->writeFieldBegin("LIST", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->LIST.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENUM) { - xfer += oprot->writeFieldBegin("ENUM", ::duckdb_apache::thrift::protocol::T_STRUCT, 4); - xfer += this->ENUM.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DECIMAL) { - xfer += oprot->writeFieldBegin("DECIMAL", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->DECIMAL.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DATE) { - xfer += oprot->writeFieldBegin("DATE", ::duckdb_apache::thrift::protocol::T_STRUCT, 6); - xfer += this->DATE.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIME) { - xfer += oprot->writeFieldBegin("TIME", ::duckdb_apache::thrift::protocol::T_STRUCT, 7); - xfer += this->TIME.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIMESTAMP) { - xfer += oprot->writeFieldBegin("TIMESTAMP", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->TIMESTAMP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.INTEGER) { - xfer += oprot->writeFieldBegin("INTEGER", ::duckdb_apache::thrift::protocol::T_STRUCT, 10); - xfer += this->INTEGER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UNKNOWN) { - xfer += oprot->writeFieldBegin("UNKNOWN", ::duckdb_apache::thrift::protocol::T_STRUCT, 11); - xfer += this->UNKNOWN.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.JSON) { - xfer += oprot->writeFieldBegin("JSON", ::duckdb_apache::thrift::protocol::T_STRUCT, 12); - xfer += this->JSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.BSON) { - xfer += oprot->writeFieldBegin("BSON", ::duckdb_apache::thrift::protocol::T_STRUCT, 13); - xfer += this->BSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UUID) { - xfer += oprot->writeFieldBegin("UUID", ::duckdb_apache::thrift::protocol::T_STRUCT, 14); - xfer += this->UUID.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(LogicalType &a, LogicalType &b) { - using ::std::swap; - swap(a.STRING, b.STRING); - swap(a.MAP, b.MAP); - swap(a.LIST, b.LIST); - swap(a.ENUM, b.ENUM); - swap(a.DECIMAL, b.DECIMAL); - swap(a.DATE, b.DATE); - swap(a.TIME, b.TIME); - swap(a.TIMESTAMP, b.TIMESTAMP); - swap(a.INTEGER, b.INTEGER); - swap(a.UNKNOWN, b.UNKNOWN); - swap(a.JSON, b.JSON); - swap(a.BSON, b.BSON); - swap(a.UUID, b.UUID); - swap(a.__isset, b.__isset); -} - -LogicalType::LogicalType(const LogicalType& other36) { - STRING = other36.STRING; - MAP = other36.MAP; - LIST = other36.LIST; - ENUM = other36.ENUM; - DECIMAL = other36.DECIMAL; - DATE = other36.DATE; - TIME = other36.TIME; - TIMESTAMP = other36.TIMESTAMP; - INTEGER = other36.INTEGER; - UNKNOWN = other36.UNKNOWN; - JSON = other36.JSON; - BSON = other36.BSON; - UUID = other36.UUID; - __isset = other36.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other37) { - STRING = other37.STRING; - MAP = other37.MAP; - LIST = other37.LIST; - ENUM = other37.ENUM; - DECIMAL = other37.DECIMAL; - DATE = other37.DATE; - TIME = other37.TIME; - TIMESTAMP = other37.TIMESTAMP; - INTEGER = other37.INTEGER; - UNKNOWN = other37.UNKNOWN; - JSON = other37.JSON; - BSON = other37.BSON; - UUID = other37.UUID; - __isset = other37.__isset; - return *this; -} -void LogicalType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "LogicalType("; - out << "STRING="; (__isset.STRING ? (out << to_string(STRING)) : (out << "")); - out << ", " << "MAP="; (__isset.MAP ? (out << to_string(MAP)) : (out << "")); - out << ", " << "LIST="; (__isset.LIST ? (out << to_string(LIST)) : (out << "")); - out << ", " << "ENUM="; (__isset.ENUM ? (out << to_string(ENUM)) : (out << "")); - out << ", " << "DECIMAL="; (__isset.DECIMAL ? (out << to_string(DECIMAL)) : (out << "")); - out << ", " << "DATE="; (__isset.DATE ? (out << to_string(DATE)) : (out << "")); - out << ", " << "TIME="; (__isset.TIME ? (out << to_string(TIME)) : (out << "")); - out << ", " << "TIMESTAMP="; (__isset.TIMESTAMP ? (out << to_string(TIMESTAMP)) : (out << "")); - out << ", " << "INTEGER="; (__isset.INTEGER ? (out << to_string(INTEGER)) : (out << "")); - out << ", " << "UNKNOWN="; (__isset.UNKNOWN ? (out << to_string(UNKNOWN)) : (out << "")); - out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "")); - out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "")); - out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "")); - out << ")"; -} - - -SchemaElement::~SchemaElement() throw() { -} - - -void SchemaElement::__set_type(const Type::type val) { - this->type = val; -__isset.type = true; -} - -void SchemaElement::__set_type_length(const int32_t val) { - this->type_length = val; -__isset.type_length = true; -} - -void SchemaElement::__set_repetition_type(const FieldRepetitionType::type val) { - this->repetition_type = val; -__isset.repetition_type = true; -} - -void SchemaElement::__set_name(const std::string& val) { - this->name = val; -} - -void SchemaElement::__set_num_children(const int32_t val) { - this->num_children = val; -__isset.num_children = true; -} - -void SchemaElement::__set_converted_type(const ConvertedType::type val) { - this->converted_type = val; -__isset.converted_type = true; -} - -void SchemaElement::__set_scale(const int32_t val) { - this->scale = val; -__isset.scale = true; -} - -void SchemaElement::__set_precision(const int32_t val) { - this->precision = val; -__isset.precision = true; -} - -void SchemaElement::__set_field_id(const int32_t val) { - this->field_id = val; -__isset.field_id = true; -} - -void SchemaElement::__set_logicalType(const LogicalType& val) { - this->logicalType = val; -__isset.logicalType = true; -} -std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t SchemaElement::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_name = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast38; - xfer += iprot->readI32(ecast38); - this->type = (Type::type)ecast38; - this->__isset.type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->type_length); - this->__isset.type_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast39; - xfer += iprot->readI32(ecast39); - this->repetition_type = (FieldRepetitionType::type)ecast39; - this->__isset.repetition_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->name); - isset_name = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_children); - this->__isset.num_children = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast40; - xfer += iprot->readI32(ecast40); - this->converted_type = (ConvertedType::type)ecast40; - this->__isset.converted_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - this->__isset.scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - this->__isset.precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->field_id); - this->__isset.field_id = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->logicalType.read(iprot); - this->__isset.logicalType = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_name) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SchemaElement::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SchemaElement"); - - if (this->__isset.type) { - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.type_length) { - xfer += oprot->writeFieldBegin("type_length", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->type_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_type) { - xfer += oprot->writeFieldBegin("repetition_type", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32((int32_t)this->repetition_type); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("name", ::duckdb_apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->name); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.num_children) { - xfer += oprot->writeFieldBegin("num_children", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->num_children); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.converted_type) { - xfer += oprot->writeFieldBegin("converted_type", ::duckdb_apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32((int32_t)this->converted_type); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.scale) { - xfer += oprot->writeFieldBegin("scale", ::duckdb_apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.precision) { - xfer += oprot->writeFieldBegin("precision", ::duckdb_apache::thrift::protocol::T_I32, 8); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.field_id) { - xfer += oprot->writeFieldBegin("field_id", ::duckdb_apache::thrift::protocol::T_I32, 9); - xfer += oprot->writeI32(this->field_id); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.logicalType) { - xfer += oprot->writeFieldBegin("logicalType", ::duckdb_apache::thrift::protocol::T_STRUCT, 10); - xfer += this->logicalType.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(SchemaElement &a, SchemaElement &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.type_length, b.type_length); - swap(a.repetition_type, b.repetition_type); - swap(a.name, b.name); - swap(a.num_children, b.num_children); - swap(a.converted_type, b.converted_type); - swap(a.scale, b.scale); - swap(a.precision, b.precision); - swap(a.field_id, b.field_id); - swap(a.logicalType, b.logicalType); - swap(a.__isset, b.__isset); -} - -SchemaElement::SchemaElement(const SchemaElement& other41) { - type = other41.type; - type_length = other41.type_length; - repetition_type = other41.repetition_type; - name = other41.name; - num_children = other41.num_children; - converted_type = other41.converted_type; - scale = other41.scale; - precision = other41.precision; - field_id = other41.field_id; - logicalType = other41.logicalType; - __isset = other41.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other42) { - type = other42.type; - type_length = other42.type_length; - repetition_type = other42.repetition_type; - name = other42.name; - num_children = other42.num_children; - converted_type = other42.converted_type; - scale = other42.scale; - precision = other42.precision; - field_id = other42.field_id; - logicalType = other42.logicalType; - __isset = other42.__isset; - return *this; -} -void SchemaElement::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "SchemaElement("; - out << "type="; (__isset.type ? (out << to_string(type)) : (out << "")); - out << ", " << "type_length="; (__isset.type_length ? (out << to_string(type_length)) : (out << "")); - out << ", " << "repetition_type="; (__isset.repetition_type ? (out << to_string(repetition_type)) : (out << "")); - out << ", " << "name=" << to_string(name); - out << ", " << "num_children="; (__isset.num_children ? (out << to_string(num_children)) : (out << "")); - out << ", " << "converted_type="; (__isset.converted_type ? (out << to_string(converted_type)) : (out << "")); - out << ", " << "scale="; (__isset.scale ? (out << to_string(scale)) : (out << "")); - out << ", " << "precision="; (__isset.precision ? (out << to_string(precision)) : (out << "")); - out << ", " << "field_id="; (__isset.field_id ? (out << to_string(field_id)) : (out << "")); - out << ", " << "logicalType="; (__isset.logicalType ? (out << to_string(logicalType)) : (out << "")); - out << ")"; -} - - -DataPageHeader::~DataPageHeader() throw() { -} - - -void DataPageHeader::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DataPageHeader::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DataPageHeader::__set_definition_level_encoding(const Encoding::type val) { - this->definition_level_encoding = val; -} - -void DataPageHeader::__set_repetition_level_encoding(const Encoding::type val) { - this->repetition_level_encoding = val; -} - -void DataPageHeader::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; -} -std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DataPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - bool isset_definition_level_encoding = false; - bool isset_repetition_level_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast43; - xfer += iprot->readI32(ecast43); - this->encoding = (Encoding::type)ecast43; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast44; - xfer += iprot->readI32(ecast44); - this->definition_level_encoding = (Encoding::type)ecast44; - isset_definition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast45; - xfer += iprot->readI32(ecast45); - this->repetition_level_encoding = (Encoding::type)ecast45; - isset_repetition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_level_encoding", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32((int32_t)this->definition_level_encoding); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_level_encoding", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->repetition_level_encoding); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DataPageHeader &a, DataPageHeader &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.encoding, b.encoding); - swap(a.definition_level_encoding, b.definition_level_encoding); - swap(a.repetition_level_encoding, b.repetition_level_encoding); - swap(a.statistics, b.statistics); - swap(a.__isset, b.__isset); -} - -DataPageHeader::DataPageHeader(const DataPageHeader& other46) { - num_values = other46.num_values; - encoding = other46.encoding; - definition_level_encoding = other46.definition_level_encoding; - repetition_level_encoding = other46.repetition_level_encoding; - statistics = other46.statistics; - __isset = other46.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other47) { - num_values = other47.num_values; - encoding = other47.encoding; - definition_level_encoding = other47.definition_level_encoding; - repetition_level_encoding = other47.repetition_level_encoding; - statistics = other47.statistics; - __isset = other47.__isset; - return *this; -} -void DataPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DataPageHeader("; - out << "num_values=" << to_string(num_values); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "definition_level_encoding=" << to_string(definition_level_encoding); - out << ", " << "repetition_level_encoding=" << to_string(repetition_level_encoding); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ")"; -} - - -IndexPageHeader::~IndexPageHeader() throw() { -} - -std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t IndexPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t IndexPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IndexPageHeader"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(IndexPageHeader &a, IndexPageHeader &b) { - using ::std::swap; - (void) a; - (void) b; -} - -IndexPageHeader::IndexPageHeader(const IndexPageHeader& other48) { - (void) other48; -} -IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other49) { - (void) other49; - return *this; -} -void IndexPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "IndexPageHeader("; - out << ")"; -} - - -DictionaryPageHeader::~DictionaryPageHeader() throw() { -} - - -void DictionaryPageHeader::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DictionaryPageHeader::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DictionaryPageHeader::__set_is_sorted(const bool val) { - this->is_sorted = val; -__isset.is_sorted = true; -} -std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DictionaryPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast50; - xfer += iprot->readI32(ecast50); - this->encoding = (Encoding::type)ecast50; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_sorted); - this->__isset.is_sorted = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DictionaryPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DictionaryPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_sorted) { - xfer += oprot->writeFieldBegin("is_sorted", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->is_sorted); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.encoding, b.encoding); - swap(a.is_sorted, b.is_sorted); - swap(a.__isset, b.__isset); -} - -DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other51) { - num_values = other51.num_values; - encoding = other51.encoding; - is_sorted = other51.is_sorted; - __isset = other51.__isset; -} -DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other52) { - num_values = other52.num_values; - encoding = other52.encoding; - is_sorted = other52.is_sorted; - __isset = other52.__isset; - return *this; -} -void DictionaryPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DictionaryPageHeader("; - out << "num_values=" << to_string(num_values); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "is_sorted="; (__isset.is_sorted ? (out << to_string(is_sorted)) : (out << "")); - out << ")"; -} - - -DataPageHeaderV2::~DataPageHeaderV2() throw() { -} - - -void DataPageHeaderV2::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DataPageHeaderV2::__set_num_nulls(const int32_t val) { - this->num_nulls = val; -} - -void DataPageHeaderV2::__set_num_rows(const int32_t val) { - this->num_rows = val; -} - -void DataPageHeaderV2::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DataPageHeaderV2::__set_definition_levels_byte_length(const int32_t val) { - this->definition_levels_byte_length = val; -} - -void DataPageHeaderV2::__set_repetition_levels_byte_length(const int32_t val) { - this->repetition_levels_byte_length = val; -} - -void DataPageHeaderV2::__set_is_compressed(const bool val) { - this->is_compressed = val; -__isset.is_compressed = true; -} - -void DataPageHeaderV2::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; -} -std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DataPageHeaderV2::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_num_nulls = false; - bool isset_num_rows = false; - bool isset_encoding = false; - bool isset_definition_levels_byte_length = false; - bool isset_repetition_levels_byte_length = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_nulls); - isset_num_nulls = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast53; - xfer += iprot->readI32(ecast53); - this->encoding = (Encoding::type)ecast53; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->definition_levels_byte_length); - isset_definition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->repetition_levels_byte_length); - isset_repetition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_compressed); - this->__isset.is_compressed = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_nulls) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeaderV2::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeaderV2"); - - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_nulls", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->num_nulls); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->definition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::duckdb_apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(this->repetition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_compressed) { - xfer += oprot->writeFieldBegin("is_compressed", ::duckdb_apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_compressed); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.num_nulls, b.num_nulls); - swap(a.num_rows, b.num_rows); - swap(a.encoding, b.encoding); - swap(a.definition_levels_byte_length, b.definition_levels_byte_length); - swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length); - swap(a.is_compressed, b.is_compressed); - swap(a.statistics, b.statistics); - swap(a.__isset, b.__isset); -} - -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other54) { - num_values = other54.num_values; - num_nulls = other54.num_nulls; - num_rows = other54.num_rows; - encoding = other54.encoding; - definition_levels_byte_length = other54.definition_levels_byte_length; - repetition_levels_byte_length = other54.repetition_levels_byte_length; - is_compressed = other54.is_compressed; - statistics = other54.statistics; - __isset = other54.__isset; -} -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other55) { - num_values = other55.num_values; - num_nulls = other55.num_nulls; - num_rows = other55.num_rows; - encoding = other55.encoding; - definition_levels_byte_length = other55.definition_levels_byte_length; - repetition_levels_byte_length = other55.repetition_levels_byte_length; - is_compressed = other55.is_compressed; - statistics = other55.statistics; - __isset = other55.__isset; - return *this; -} -void DataPageHeaderV2::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DataPageHeaderV2("; - out << "num_values=" << to_string(num_values); - out << ", " << "num_nulls=" << to_string(num_nulls); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "definition_levels_byte_length=" << to_string(definition_levels_byte_length); - out << ", " << "repetition_levels_byte_length=" << to_string(repetition_levels_byte_length); - out << ", " << "is_compressed="; (__isset.is_compressed ? (out << to_string(is_compressed)) : (out << "")); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ")"; -} - - -PageHeader::~PageHeader() throw() { -} - - -void PageHeader::__set_type(const PageType::type val) { - this->type = val; -} - -void PageHeader::__set_uncompressed_page_size(const int32_t val) { - this->uncompressed_page_size = val; -} - -void PageHeader::__set_compressed_page_size(const int32_t val) { - this->compressed_page_size = val; -} - -void PageHeader::__set_crc(const int32_t val) { - this->crc = val; -__isset.crc = true; -} - -void PageHeader::__set_data_page_header(const DataPageHeader& val) { - this->data_page_header = val; -__isset.data_page_header = true; -} - -void PageHeader::__set_index_page_header(const IndexPageHeader& val) { - this->index_page_header = val; -__isset.index_page_header = true; -} - -void PageHeader::__set_dictionary_page_header(const DictionaryPageHeader& val) { - this->dictionary_page_header = val; -__isset.dictionary_page_header = true; -} - -void PageHeader::__set_data_page_header_v2(const DataPageHeaderV2& val) { - this->data_page_header_v2 = val; -__isset.data_page_header_v2 = true; -} -std::ostream& operator<<(std::ostream& out, const PageHeader& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t PageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_uncompressed_page_size = false; - bool isset_compressed_page_size = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast56; - xfer += iprot->readI32(ecast56); - this->type = (PageType::type)ecast56; - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->uncompressed_page_size); - isset_uncompressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->crc); - this->__isset.crc = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header.read(iprot); - this->__isset.data_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->index_page_header.read(iprot); - this->__isset.index_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->dictionary_page_header.read(iprot); - this->__isset.dictionary_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header_v2.read(iprot); - this->__isset.data_page_header_v2 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_uncompressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageHeader"); - - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("uncompressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->uncompressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.crc) { - xfer += oprot->writeFieldBegin("crc", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(this->crc); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header) { - xfer += oprot->writeFieldBegin("data_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->data_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.index_page_header) { - xfer += oprot->writeFieldBegin("index_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 6); - xfer += this->index_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_header) { - xfer += oprot->writeFieldBegin("dictionary_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 7); - xfer += this->dictionary_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header_v2) { - xfer += oprot->writeFieldBegin("data_page_header_v2", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->data_page_header_v2.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageHeader &a, PageHeader &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.uncompressed_page_size, b.uncompressed_page_size); - swap(a.compressed_page_size, b.compressed_page_size); - swap(a.crc, b.crc); - swap(a.data_page_header, b.data_page_header); - swap(a.index_page_header, b.index_page_header); - swap(a.dictionary_page_header, b.dictionary_page_header); - swap(a.data_page_header_v2, b.data_page_header_v2); - swap(a.__isset, b.__isset); -} - -PageHeader::PageHeader(const PageHeader& other57) { - type = other57.type; - uncompressed_page_size = other57.uncompressed_page_size; - compressed_page_size = other57.compressed_page_size; - crc = other57.crc; - data_page_header = other57.data_page_header; - index_page_header = other57.index_page_header; - dictionary_page_header = other57.dictionary_page_header; - data_page_header_v2 = other57.data_page_header_v2; - __isset = other57.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other58) { - type = other58.type; - uncompressed_page_size = other58.uncompressed_page_size; - compressed_page_size = other58.compressed_page_size; - crc = other58.crc; - data_page_header = other58.data_page_header; - index_page_header = other58.index_page_header; - dictionary_page_header = other58.dictionary_page_header; - data_page_header_v2 = other58.data_page_header_v2; - __isset = other58.__isset; - return *this; -} -void PageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageHeader("; - out << "type=" << to_string(type); - out << ", " << "uncompressed_page_size=" << to_string(uncompressed_page_size); - out << ", " << "compressed_page_size=" << to_string(compressed_page_size); - out << ", " << "crc="; (__isset.crc ? (out << to_string(crc)) : (out << "")); - out << ", " << "data_page_header="; (__isset.data_page_header ? (out << to_string(data_page_header)) : (out << "")); - out << ", " << "index_page_header="; (__isset.index_page_header ? (out << to_string(index_page_header)) : (out << "")); - out << ", " << "dictionary_page_header="; (__isset.dictionary_page_header ? (out << to_string(dictionary_page_header)) : (out << "")); - out << ", " << "data_page_header_v2="; (__isset.data_page_header_v2 ? (out << to_string(data_page_header_v2)) : (out << "")); - out << ")"; -} - - -KeyValue::~KeyValue() throw() { -} - - -void KeyValue::__set_key(const std::string& val) { - this->key = val; -} - -void KeyValue::__set_value(const std::string& val) { - this->value = val; -__isset.value = true; -} -std::ostream& operator<<(std::ostream& out, const KeyValue& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t KeyValue::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_key = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->key); - isset_key = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->value); - this->__isset.value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_key) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t KeyValue::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("KeyValue"); - - xfer += oprot->writeFieldBegin("key", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->key); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.value) { - xfer += oprot->writeFieldBegin("value", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeString(this->value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(KeyValue &a, KeyValue &b) { - using ::std::swap; - swap(a.key, b.key); - swap(a.value, b.value); - swap(a.__isset, b.__isset); -} - -KeyValue::KeyValue(const KeyValue& other59) { - key = other59.key; - value = other59.value; - __isset = other59.__isset; -} -KeyValue& KeyValue::operator=(const KeyValue& other60) { - key = other60.key; - value = other60.value; - __isset = other60.__isset; - return *this; -} -void KeyValue::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "KeyValue("; - out << "key=" << to_string(key); - out << ", " << "value="; (__isset.value ? (out << to_string(value)) : (out << "")); - out << ")"; -} - - -SortingColumn::~SortingColumn() throw() { -} - - -void SortingColumn::__set_column_idx(const int32_t val) { - this->column_idx = val; -} - -void SortingColumn::__set_descending(const bool val) { - this->descending = val; -} - -void SortingColumn::__set_nulls_first(const bool val) { - this->nulls_first = val; -} -std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t SortingColumn::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_column_idx = false; - bool isset_descending = false; - bool isset_nulls_first = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_idx); - isset_column_idx = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->descending); - isset_descending = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->nulls_first); - isset_nulls_first = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_column_idx) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_descending) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_nulls_first) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SortingColumn::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SortingColumn"); - - xfer += oprot->writeFieldBegin("column_idx", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->column_idx); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("descending", ::duckdb_apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->descending); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("nulls_first", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->nulls_first); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(SortingColumn &a, SortingColumn &b) { - using ::std::swap; - swap(a.column_idx, b.column_idx); - swap(a.descending, b.descending); - swap(a.nulls_first, b.nulls_first); -} - -SortingColumn::SortingColumn(const SortingColumn& other61) { - column_idx = other61.column_idx; - descending = other61.descending; - nulls_first = other61.nulls_first; -} -SortingColumn& SortingColumn::operator=(const SortingColumn& other62) { - column_idx = other62.column_idx; - descending = other62.descending; - nulls_first = other62.nulls_first; - return *this; -} -void SortingColumn::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "SortingColumn("; - out << "column_idx=" << to_string(column_idx); - out << ", " << "descending=" << to_string(descending); - out << ", " << "nulls_first=" << to_string(nulls_first); - out << ")"; -} - - -PageEncodingStats::~PageEncodingStats() throw() { -} - - -void PageEncodingStats::__set_page_type(const PageType::type val) { - this->page_type = val; -} - -void PageEncodingStats::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void PageEncodingStats::__set_count(const int32_t val) { - this->count = val; -} -std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t PageEncodingStats::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_page_type = false; - bool isset_encoding = false; - bool isset_count = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast63; - xfer += iprot->readI32(ecast63); - this->page_type = (PageType::type)ecast63; - isset_page_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast64; - xfer += iprot->readI32(ecast64); - this->encoding = (Encoding::type)ecast64; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->count); - isset_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_count) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageEncodingStats::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageEncodingStats"); - - xfer += oprot->writeFieldBegin("page_type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->page_type); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("count", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->count); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageEncodingStats &a, PageEncodingStats &b) { - using ::std::swap; - swap(a.page_type, b.page_type); - swap(a.encoding, b.encoding); - swap(a.count, b.count); -} - -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other65) { - page_type = other65.page_type; - encoding = other65.encoding; - count = other65.count; -} -PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other66) { - page_type = other66.page_type; - encoding = other66.encoding; - count = other66.count; - return *this; -} -void PageEncodingStats::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageEncodingStats("; - out << "page_type=" << to_string(page_type); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "count=" << to_string(count); - out << ")"; -} - - -ColumnMetaData::~ColumnMetaData() throw() { -} - - -void ColumnMetaData::__set_type(const Type::type val) { - this->type = val; -} - -void ColumnMetaData::__set_encodings(const std::vector & val) { - this->encodings = val; -} - -void ColumnMetaData::__set_path_in_schema(const std::vector & val) { - this->path_in_schema = val; -} - -void ColumnMetaData::__set_codec(const CompressionCodec::type val) { - this->codec = val; -} - -void ColumnMetaData::__set_num_values(const int64_t val) { - this->num_values = val; -} - -void ColumnMetaData::__set_total_uncompressed_size(const int64_t val) { - this->total_uncompressed_size = val; -} - -void ColumnMetaData::__set_total_compressed_size(const int64_t val) { - this->total_compressed_size = val; -} - -void ColumnMetaData::__set_key_value_metadata(const std::vector & val) { - this->key_value_metadata = val; -__isset.key_value_metadata = true; -} - -void ColumnMetaData::__set_data_page_offset(const int64_t val) { - this->data_page_offset = val; -} - -void ColumnMetaData::__set_index_page_offset(const int64_t val) { - this->index_page_offset = val; -__isset.index_page_offset = true; -} - -void ColumnMetaData::__set_dictionary_page_offset(const int64_t val) { - this->dictionary_page_offset = val; -__isset.dictionary_page_offset = true; -} - -void ColumnMetaData::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; -} - -void ColumnMetaData::__set_encoding_stats(const std::vector & val) { - this->encoding_stats = val; -__isset.encoding_stats = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_encodings = false; - bool isset_path_in_schema = false; - bool isset_codec = false; - bool isset_num_values = false; - bool isset_total_uncompressed_size = false; - bool isset_total_compressed_size = false; - bool isset_data_page_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast67; - xfer += iprot->readI32(ecast67); - this->type = (Type::type)ecast67; - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->encodings.clear(); - uint32_t _size68; - ::duckdb_apache::thrift::protocol::TType _etype71; - xfer += iprot->readListBegin(_etype71, _size68); - this->encodings.resize(_size68); - uint32_t _i72; - for (_i72 = 0; _i72 < _size68; ++_i72) - { - int32_t ecast73; - xfer += iprot->readI32(ecast73); - this->encodings[_i72] = (Encoding::type)ecast73; - } - xfer += iprot->readListEnd(); - } - isset_encodings = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size74; - ::duckdb_apache::thrift::protocol::TType _etype77; - xfer += iprot->readListBegin(_etype77, _size74); - this->path_in_schema.resize(_size74); - uint32_t _i78; - for (_i78 = 0; _i78 < _size74; ++_i78) - { - xfer += iprot->readString(this->path_in_schema[_i78]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast79; - xfer += iprot->readI32(ecast79); - this->codec = (CompressionCodec::type)ecast79; - isset_codec = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_uncompressed_size); - isset_total_uncompressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - isset_total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size80; - ::duckdb_apache::thrift::protocol::TType _etype83; - xfer += iprot->readListBegin(_etype83, _size80); - this->key_value_metadata.resize(_size80); - uint32_t _i84; - for (_i84 = 0; _i84 < _size80; ++_i84) - { - xfer += this->key_value_metadata[_i84].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->data_page_offset); - isset_data_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->index_page_offset); - this->__isset.index_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->dictionary_page_offset); - this->__isset.dictionary_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->encoding_stats.clear(); - uint32_t _size85; - ::duckdb_apache::thrift::protocol::TType _etype88; - xfer += iprot->readListBegin(_etype88, _size85); - this->encoding_stats.resize(_size85); - uint32_t _i89; - for (_i89 = 0; _i89 < _size85; ++_i89) - { - xfer += this->encoding_stats[_i89].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.encoding_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encodings) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_codec) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_uncompressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_compressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_data_page_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnMetaData"); - - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encodings", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter90; - for (_iter90 = this->encodings.begin(); _iter90 != this->encodings.end(); ++_iter90) - { - xfer += oprot->writeI32((int32_t)(*_iter90)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("path_in_schema", ::duckdb_apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter91; - for (_iter91 = this->path_in_schema.begin(); _iter91 != this->path_in_schema.end(); ++_iter91) - { - xfer += oprot->writeString((*_iter91)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("codec", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->codec); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_uncompressed_size", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_uncompressed_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_compressed_size", ::duckdb_apache::thrift::protocol::T_I64, 7); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::duckdb_apache::thrift::protocol::T_LIST, 8); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter92; - for (_iter92 = this->key_value_metadata.begin(); _iter92 != this->key_value_metadata.end(); ++_iter92) - { - xfer += (*_iter92).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("data_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 9); - xfer += oprot->writeI64(this->data_page_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.index_page_offset) { - xfer += oprot->writeFieldBegin("index_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 10); - xfer += oprot->writeI64(this->index_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_offset) { - xfer += oprot->writeFieldBegin("dictionary_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 11); - xfer += oprot->writeI64(this->dictionary_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 12); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encoding_stats) { - xfer += oprot->writeFieldBegin("encoding_stats", ::duckdb_apache::thrift::protocol::T_LIST, 13); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter93; - for (_iter93 = this->encoding_stats.begin(); _iter93 != this->encoding_stats.end(); ++_iter93) - { - xfer += (*_iter93).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnMetaData &a, ColumnMetaData &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.encodings, b.encodings); - swap(a.path_in_schema, b.path_in_schema); - swap(a.codec, b.codec); - swap(a.num_values, b.num_values); - swap(a.total_uncompressed_size, b.total_uncompressed_size); - swap(a.total_compressed_size, b.total_compressed_size); - swap(a.key_value_metadata, b.key_value_metadata); - swap(a.data_page_offset, b.data_page_offset); - swap(a.index_page_offset, b.index_page_offset); - swap(a.dictionary_page_offset, b.dictionary_page_offset); - swap(a.statistics, b.statistics); - swap(a.encoding_stats, b.encoding_stats); - swap(a.__isset, b.__isset); -} - -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other94) { - type = other94.type; - encodings = other94.encodings; - path_in_schema = other94.path_in_schema; - codec = other94.codec; - num_values = other94.num_values; - total_uncompressed_size = other94.total_uncompressed_size; - total_compressed_size = other94.total_compressed_size; - key_value_metadata = other94.key_value_metadata; - data_page_offset = other94.data_page_offset; - index_page_offset = other94.index_page_offset; - dictionary_page_offset = other94.dictionary_page_offset; - statistics = other94.statistics; - encoding_stats = other94.encoding_stats; - __isset = other94.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other95) { - type = other95.type; - encodings = other95.encodings; - path_in_schema = other95.path_in_schema; - codec = other95.codec; - num_values = other95.num_values; - total_uncompressed_size = other95.total_uncompressed_size; - total_compressed_size = other95.total_compressed_size; - key_value_metadata = other95.key_value_metadata; - data_page_offset = other95.data_page_offset; - index_page_offset = other95.index_page_offset; - dictionary_page_offset = other95.dictionary_page_offset; - statistics = other95.statistics; - encoding_stats = other95.encoding_stats; - __isset = other95.__isset; - return *this; -} -void ColumnMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnMetaData("; - out << "type=" << to_string(type); - out << ", " << "encodings=" << to_string(encodings); - out << ", " << "path_in_schema=" << to_string(path_in_schema); - out << ", " << "codec=" << to_string(codec); - out << ", " << "num_values=" << to_string(num_values); - out << ", " << "total_uncompressed_size=" << to_string(total_uncompressed_size); - out << ", " << "total_compressed_size=" << to_string(total_compressed_size); - out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "")); - out << ", " << "data_page_offset=" << to_string(data_page_offset); - out << ", " << "index_page_offset="; (__isset.index_page_offset ? (out << to_string(index_page_offset)) : (out << "")); - out << ", " << "dictionary_page_offset="; (__isset.dictionary_page_offset ? (out << to_string(dictionary_page_offset)) : (out << "")); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ", " << "encoding_stats="; (__isset.encoding_stats ? (out << to_string(encoding_stats)) : (out << "")); - out << ")"; -} - - -EncryptionWithFooterKey::~EncryptionWithFooterKey() throw() { -} - -std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t EncryptionWithFooterKey::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionWithFooterKey::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { - using ::std::swap; - (void) a; - (void) b; -} - -EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other96) { - (void) other96; -} -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other97) { - (void) other97; - return *this; -} -void EncryptionWithFooterKey::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionWithFooterKey("; - out << ")"; -} - - -EncryptionWithColumnKey::~EncryptionWithColumnKey() throw() { -} - - -void EncryptionWithColumnKey::__set_path_in_schema(const std::vector & val) { - this->path_in_schema = val; -} - -void EncryptionWithColumnKey::__set_key_metadata(const std::string& val) { - this->key_metadata = val; -__isset.key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t EncryptionWithColumnKey::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_path_in_schema = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size98; - ::duckdb_apache::thrift::protocol::TType _etype101; - xfer += iprot->readListBegin(_etype101, _size98); - this->path_in_schema.resize(_size98); - uint32_t _i102; - for (_i102 = 0; _i102 < _size98; ++_i102) - { - xfer += iprot->readString(this->path_in_schema[_i102]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t EncryptionWithColumnKey::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); - - xfer += oprot->writeFieldBegin("path_in_schema", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter103; - for (_iter103 = this->path_in_schema.begin(); _iter103 != this->path_in_schema.end(); ++_iter103) - { - xfer += oprot->writeString((*_iter103)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { - using ::std::swap; - swap(a.path_in_schema, b.path_in_schema); - swap(a.key_metadata, b.key_metadata); - swap(a.__isset, b.__isset); -} - -EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other104) { - path_in_schema = other104.path_in_schema; - key_metadata = other104.key_metadata; - __isset = other104.__isset; -} -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other105) { - path_in_schema = other105.path_in_schema; - key_metadata = other105.key_metadata; - __isset = other105.__isset; - return *this; -} -void EncryptionWithColumnKey::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionWithColumnKey("; - out << "path_in_schema=" << to_string(path_in_schema); - out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); - out << ")"; -} - - -ColumnCryptoMetaData::~ColumnCryptoMetaData() throw() { -} - - -void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val) { - this->ENCRYPTION_WITH_FOOTER_KEY = val; -__isset.ENCRYPTION_WITH_FOOTER_KEY = true; -} - -void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val) { - this->ENCRYPTION_WITH_COLUMN_KEY = val; -__isset.ENCRYPTION_WITH_COLUMN_KEY = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnCryptoMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnCryptoMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); - - if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { - using ::std::swap; - swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); - swap(a.ENCRYPTION_WITH_COLUMN_KEY, b.ENCRYPTION_WITH_COLUMN_KEY); - swap(a.__isset, b.__isset); -} - -ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other106) { - ENCRYPTION_WITH_FOOTER_KEY = other106.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other106.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other106.__isset; -} -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other107) { - ENCRYPTION_WITH_FOOTER_KEY = other107.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other107.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other107.__isset; - return *this; -} -void ColumnCryptoMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnCryptoMetaData("; - out << "ENCRYPTION_WITH_FOOTER_KEY="; (__isset.ENCRYPTION_WITH_FOOTER_KEY ? (out << to_string(ENCRYPTION_WITH_FOOTER_KEY)) : (out << "")); - out << ", " << "ENCRYPTION_WITH_COLUMN_KEY="; (__isset.ENCRYPTION_WITH_COLUMN_KEY ? (out << to_string(ENCRYPTION_WITH_COLUMN_KEY)) : (out << "")); - out << ")"; -} - - -ColumnChunk::~ColumnChunk() throw() { -} - - -void ColumnChunk::__set_file_path(const std::string& val) { - this->file_path = val; -__isset.file_path = true; -} - -void ColumnChunk::__set_file_offset(const int64_t val) { - this->file_offset = val; -} - -void ColumnChunk::__set_meta_data(const ColumnMetaData& val) { - this->meta_data = val; -__isset.meta_data = true; -} - -void ColumnChunk::__set_offset_index_offset(const int64_t val) { - this->offset_index_offset = val; -__isset.offset_index_offset = true; -} - -void ColumnChunk::__set_offset_index_length(const int32_t val) { - this->offset_index_length = val; -__isset.offset_index_length = true; -} - -void ColumnChunk::__set_column_index_offset(const int64_t val) { - this->column_index_offset = val; -__isset.column_index_offset = true; -} - -void ColumnChunk::__set_column_index_length(const int32_t val) { - this->column_index_length = val; -__isset.column_index_length = true; -} - -void ColumnChunk::__set_crypto_metadata(const ColumnCryptoMetaData& val) { - this->crypto_metadata = val; -__isset.crypto_metadata = true; -} - -void ColumnChunk::__set_encrypted_column_metadata(const std::string& val) { - this->encrypted_column_metadata = val; -__isset.encrypted_column_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnChunk::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_file_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->file_path); - this->__isset.file_path = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - isset_file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->meta_data.read(iprot); - this->__isset.meta_data = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset_index_offset); - this->__isset.offset_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->offset_index_length); - this->__isset.offset_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->column_index_offset); - this->__isset.column_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_index_length); - this->__isset.column_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->crypto_metadata.read(iprot); - this->__isset.crypto_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->encrypted_column_metadata); - this->__isset.encrypted_column_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_file_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnChunk::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnChunk"); - - if (this->__isset.file_path) { - xfer += oprot->writeFieldBegin("file_path", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->file_path); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("file_offset", ::duckdb_apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.meta_data) { - xfer += oprot->writeFieldBegin("meta_data", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->meta_data.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_offset) { - xfer += oprot->writeFieldBegin("offset_index_offset", ::duckdb_apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->offset_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_length) { - xfer += oprot->writeFieldBegin("offset_index_length", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->offset_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_offset) { - xfer += oprot->writeFieldBegin("column_index_offset", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->column_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_length) { - xfer += oprot->writeFieldBegin("column_index_length", ::duckdb_apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->column_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.crypto_metadata) { - xfer += oprot->writeFieldBegin("crypto_metadata", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->crypto_metadata.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encrypted_column_metadata) { - xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->encrypted_column_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnChunk &a, ColumnChunk &b) { - using ::std::swap; - swap(a.file_path, b.file_path); - swap(a.file_offset, b.file_offset); - swap(a.meta_data, b.meta_data); - swap(a.offset_index_offset, b.offset_index_offset); - swap(a.offset_index_length, b.offset_index_length); - swap(a.column_index_offset, b.column_index_offset); - swap(a.column_index_length, b.column_index_length); - swap(a.crypto_metadata, b.crypto_metadata); - swap(a.encrypted_column_metadata, b.encrypted_column_metadata); - swap(a.__isset, b.__isset); -} - -ColumnChunk::ColumnChunk(const ColumnChunk& other108) { - file_path = other108.file_path; - file_offset = other108.file_offset; - meta_data = other108.meta_data; - offset_index_offset = other108.offset_index_offset; - offset_index_length = other108.offset_index_length; - column_index_offset = other108.column_index_offset; - column_index_length = other108.column_index_length; - crypto_metadata = other108.crypto_metadata; - encrypted_column_metadata = other108.encrypted_column_metadata; - __isset = other108.__isset; -} -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other109) { - file_path = other109.file_path; - file_offset = other109.file_offset; - meta_data = other109.meta_data; - offset_index_offset = other109.offset_index_offset; - offset_index_length = other109.offset_index_length; - column_index_offset = other109.column_index_offset; - column_index_length = other109.column_index_length; - crypto_metadata = other109.crypto_metadata; - encrypted_column_metadata = other109.encrypted_column_metadata; - __isset = other109.__isset; - return *this; -} -void ColumnChunk::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnChunk("; - out << "file_path="; (__isset.file_path ? (out << to_string(file_path)) : (out << "")); - out << ", " << "file_offset=" << to_string(file_offset); - out << ", " << "meta_data="; (__isset.meta_data ? (out << to_string(meta_data)) : (out << "")); - out << ", " << "offset_index_offset="; (__isset.offset_index_offset ? (out << to_string(offset_index_offset)) : (out << "")); - out << ", " << "offset_index_length="; (__isset.offset_index_length ? (out << to_string(offset_index_length)) : (out << "")); - out << ", " << "column_index_offset="; (__isset.column_index_offset ? (out << to_string(column_index_offset)) : (out << "")); - out << ", " << "column_index_length="; (__isset.column_index_length ? (out << to_string(column_index_length)) : (out << "")); - out << ", " << "crypto_metadata="; (__isset.crypto_metadata ? (out << to_string(crypto_metadata)) : (out << "")); - out << ", " << "encrypted_column_metadata="; (__isset.encrypted_column_metadata ? (out << to_string(encrypted_column_metadata)) : (out << "")); - out << ")"; -} - - -RowGroup::~RowGroup() throw() { -} - - -void RowGroup::__set_columns(const std::vector & val) { - this->columns = val; -} - -void RowGroup::__set_total_byte_size(const int64_t val) { - this->total_byte_size = val; -} - -void RowGroup::__set_num_rows(const int64_t val) { - this->num_rows = val; -} - -void RowGroup::__set_sorting_columns(const std::vector & val) { - this->sorting_columns = val; -__isset.sorting_columns = true; -} - -void RowGroup::__set_file_offset(const int64_t val) { - this->file_offset = val; -__isset.file_offset = true; -} - -void RowGroup::__set_total_compressed_size(const int64_t val) { - this->total_compressed_size = val; -__isset.total_compressed_size = true; -} - -void RowGroup::__set_ordinal(const int16_t val) { - this->ordinal = val; -__isset.ordinal = true; -} -std::ostream& operator<<(std::ostream& out, const RowGroup& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t RowGroup::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_columns = false; - bool isset_total_byte_size = false; - bool isset_num_rows = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->columns.clear(); - uint32_t _size110; - ::duckdb_apache::thrift::protocol::TType _etype113; - xfer += iprot->readListBegin(_etype113, _size110); - this->columns.resize(_size110); - uint32_t _i114; - for (_i114 = 0; _i114 < _size110; ++_i114) - { - xfer += this->columns[_i114].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_byte_size); - isset_total_byte_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->sorting_columns.clear(); - uint32_t _size115; - ::duckdb_apache::thrift::protocol::TType _etype118; - xfer += iprot->readListBegin(_etype118, _size115); - this->sorting_columns.resize(_size115); - uint32_t _i119; - for (_i119 = 0; _i119 < _size115; ++_i119) - { - xfer += this->sorting_columns[_i119].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.sorting_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - this->__isset.file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - this->__isset.total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I16) { - xfer += iprot->readI16(this->ordinal); - this->__isset.ordinal = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_columns) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_byte_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t RowGroup::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("RowGroup"); - - xfer += oprot->writeFieldBegin("columns", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter120; - for (_iter120 = this->columns.begin(); _iter120 != this->columns.end(); ++_iter120) - { - xfer += (*_iter120).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_byte_size", ::duckdb_apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->total_byte_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.sorting_columns) { - xfer += oprot->writeFieldBegin("sorting_columns", ::duckdb_apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter121; - for (_iter121 = this->sorting_columns.begin(); _iter121 != this->sorting_columns.end(); ++_iter121) - { - xfer += (*_iter121).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.file_offset) { - xfer += oprot->writeFieldBegin("file_offset", ::duckdb_apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.total_compressed_size) { - xfer += oprot->writeFieldBegin("total_compressed_size", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ordinal) { - xfer += oprot->writeFieldBegin("ordinal", ::duckdb_apache::thrift::protocol::T_I16, 7); - xfer += oprot->writeI16(this->ordinal); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(RowGroup &a, RowGroup &b) { - using ::std::swap; - swap(a.columns, b.columns); - swap(a.total_byte_size, b.total_byte_size); - swap(a.num_rows, b.num_rows); - swap(a.sorting_columns, b.sorting_columns); - swap(a.file_offset, b.file_offset); - swap(a.total_compressed_size, b.total_compressed_size); - swap(a.ordinal, b.ordinal); - swap(a.__isset, b.__isset); -} - -RowGroup::RowGroup(const RowGroup& other122) { - columns = other122.columns; - total_byte_size = other122.total_byte_size; - num_rows = other122.num_rows; - sorting_columns = other122.sorting_columns; - file_offset = other122.file_offset; - total_compressed_size = other122.total_compressed_size; - ordinal = other122.ordinal; - __isset = other122.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other123) { - columns = other123.columns; - total_byte_size = other123.total_byte_size; - num_rows = other123.num_rows; - sorting_columns = other123.sorting_columns; - file_offset = other123.file_offset; - total_compressed_size = other123.total_compressed_size; - ordinal = other123.ordinal; - __isset = other123.__isset; - return *this; -} -void RowGroup::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "RowGroup("; - out << "columns=" << to_string(columns); - out << ", " << "total_byte_size=" << to_string(total_byte_size); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "sorting_columns="; (__isset.sorting_columns ? (out << to_string(sorting_columns)) : (out << "")); - out << ", " << "file_offset="; (__isset.file_offset ? (out << to_string(file_offset)) : (out << "")); - out << ", " << "total_compressed_size="; (__isset.total_compressed_size ? (out << to_string(total_compressed_size)) : (out << "")); - out << ", " << "ordinal="; (__isset.ordinal ? (out << to_string(ordinal)) : (out << "")); - out << ")"; -} - - -TypeDefinedOrder::~TypeDefinedOrder() throw() { -} - -std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t TypeDefinedOrder::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TypeDefinedOrder::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TypeDefinedOrder"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { - using ::std::swap; - (void) a; - (void) b; -} - -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other124) { - (void) other124; -} -TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other125) { - (void) other125; - return *this; -} -void TypeDefinedOrder::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TypeDefinedOrder("; - out << ")"; -} - - -ColumnOrder::~ColumnOrder() throw() { -} - - -void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { - this->TYPE_ORDER = val; -__isset.TYPE_ORDER = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnOrder::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TYPE_ORDER.read(iprot); - this->__isset.TYPE_ORDER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnOrder::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnOrder"); - - if (this->__isset.TYPE_ORDER) { - xfer += oprot->writeFieldBegin("TYPE_ORDER", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->TYPE_ORDER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnOrder &a, ColumnOrder &b) { - using ::std::swap; - swap(a.TYPE_ORDER, b.TYPE_ORDER); - swap(a.__isset, b.__isset); -} - -ColumnOrder::ColumnOrder(const ColumnOrder& other126) { - TYPE_ORDER = other126.TYPE_ORDER; - __isset = other126.__isset; -} -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other127) { - TYPE_ORDER = other127.TYPE_ORDER; - __isset = other127.__isset; - return *this; -} -void ColumnOrder::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnOrder("; - out << "TYPE_ORDER="; (__isset.TYPE_ORDER ? (out << to_string(TYPE_ORDER)) : (out << "")); - out << ")"; -} - - -PageLocation::~PageLocation() throw() { -} - - -void PageLocation::__set_offset(const int64_t val) { - this->offset = val; -} - -void PageLocation::__set_compressed_page_size(const int32_t val) { - this->compressed_page_size = val; -} - -void PageLocation::__set_first_row_index(const int64_t val) { - this->first_row_index = val; -} -std::ostream& operator<<(std::ostream& out, const PageLocation& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t PageLocation::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_offset = false; - bool isset_compressed_page_size = false; - bool isset_first_row_index = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset); - isset_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->first_row_index); - isset_first_row_index = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_first_row_index) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageLocation::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageLocation"); - - xfer += oprot->writeFieldBegin("offset", ::duckdb_apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->offset); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("first_row_index", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->first_row_index); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageLocation &a, PageLocation &b) { - using ::std::swap; - swap(a.offset, b.offset); - swap(a.compressed_page_size, b.compressed_page_size); - swap(a.first_row_index, b.first_row_index); -} - -PageLocation::PageLocation(const PageLocation& other128) { - offset = other128.offset; - compressed_page_size = other128.compressed_page_size; - first_row_index = other128.first_row_index; -} -PageLocation& PageLocation::operator=(const PageLocation& other129) { - offset = other129.offset; - compressed_page_size = other129.compressed_page_size; - first_row_index = other129.first_row_index; - return *this; -} -void PageLocation::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageLocation("; - out << "offset=" << to_string(offset); - out << ", " << "compressed_page_size=" << to_string(compressed_page_size); - out << ", " << "first_row_index=" << to_string(first_row_index); - out << ")"; -} - - -OffsetIndex::~OffsetIndex() throw() { -} - - -void OffsetIndex::__set_page_locations(const std::vector & val) { - this->page_locations = val; -} -std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t OffsetIndex::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_page_locations = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->page_locations.clear(); - uint32_t _size130; - ::duckdb_apache::thrift::protocol::TType _etype133; - xfer += iprot->readListBegin(_etype133, _size130); - this->page_locations.resize(_size130); - uint32_t _i134; - for (_i134 = 0; _i134 < _size130; ++_i134) - { - xfer += this->page_locations[_i134].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_page_locations = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_locations) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t OffsetIndex::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("OffsetIndex"); - - xfer += oprot->writeFieldBegin("page_locations", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter135; - for (_iter135 = this->page_locations.begin(); _iter135 != this->page_locations.end(); ++_iter135) - { - xfer += (*_iter135).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(OffsetIndex &a, OffsetIndex &b) { - using ::std::swap; - swap(a.page_locations, b.page_locations); -} - -OffsetIndex::OffsetIndex(const OffsetIndex& other136) { - page_locations = other136.page_locations; -} -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other137) { - page_locations = other137.page_locations; - return *this; -} -void OffsetIndex::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "OffsetIndex("; - out << "page_locations=" << to_string(page_locations); - out << ")"; -} - - -ColumnIndex::~ColumnIndex() throw() { -} - - -void ColumnIndex::__set_null_pages(const std::vector & val) { - this->null_pages = val; -} - -void ColumnIndex::__set_min_values(const std::vector & val) { - this->min_values = val; -} - -void ColumnIndex::__set_max_values(const std::vector & val) { - this->max_values = val; -} - -void ColumnIndex::__set_boundary_order(const BoundaryOrder::type val) { - this->boundary_order = val; -} - -void ColumnIndex::__set_null_counts(const std::vector & val) { - this->null_counts = val; -__isset.null_counts = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnIndex::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_null_pages = false; - bool isset_min_values = false; - bool isset_max_values = false; - bool isset_boundary_order = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->null_pages.clear(); - uint32_t _size138; - ::duckdb_apache::thrift::protocol::TType _etype141; - xfer += iprot->readListBegin(_etype141, _size138); - this->null_pages.resize(_size138); - uint32_t _i142; - for (_i142 = 0; _i142 < _size138; ++_i142) - { - xfer += iprot->readBool(this->null_pages[_i142]); - } - xfer += iprot->readListEnd(); - } - isset_null_pages = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->min_values.clear(); - uint32_t _size143; - ::duckdb_apache::thrift::protocol::TType _etype146; - xfer += iprot->readListBegin(_etype146, _size143); - this->min_values.resize(_size143); - uint32_t _i147; - for (_i147 = 0; _i147 < _size143; ++_i147) - { - xfer += iprot->readBinary(this->min_values[_i147]); - } - xfer += iprot->readListEnd(); - } - isset_min_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->max_values.clear(); - uint32_t _size148; - ::duckdb_apache::thrift::protocol::TType _etype151; - xfer += iprot->readListBegin(_etype151, _size148); - this->max_values.resize(_size148); - uint32_t _i152; - for (_i152 = 0; _i152 < _size148; ++_i152) - { - xfer += iprot->readBinary(this->max_values[_i152]); - } - xfer += iprot->readListEnd(); - } - isset_max_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast153; - xfer += iprot->readI32(ecast153); - this->boundary_order = (BoundaryOrder::type)ecast153; - isset_boundary_order = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->null_counts.clear(); - uint32_t _size154; - ::duckdb_apache::thrift::protocol::TType _etype157; - xfer += iprot->readListBegin(_etype157, _size154); - this->null_counts.resize(_size154); - uint32_t _i158; - for (_i158 = 0; _i158 < _size154; ++_i158) - { - xfer += iprot->readI64(this->null_counts[_i158]); - } - xfer += iprot->readListEnd(); - } - this->__isset.null_counts = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_null_pages) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_min_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_max_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_boundary_order) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnIndex::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnIndex"); - - xfer += oprot->writeFieldBegin("null_pages", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter159; - for (_iter159 = this->null_pages.begin(); _iter159 != this->null_pages.end(); ++_iter159) - { - xfer += oprot->writeBool((*_iter159)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("min_values", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter160; - for (_iter160 = this->min_values.begin(); _iter160 != this->min_values.end(); ++_iter160) - { - xfer += oprot->writeBinary((*_iter160)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("max_values", ::duckdb_apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter161; - for (_iter161 = this->max_values.begin(); _iter161 != this->max_values.end(); ++_iter161) - { - xfer += oprot->writeBinary((*_iter161)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("boundary_order", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->boundary_order); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.null_counts) { - xfer += oprot->writeFieldBegin("null_counts", ::duckdb_apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter162; - for (_iter162 = this->null_counts.begin(); _iter162 != this->null_counts.end(); ++_iter162) - { - xfer += oprot->writeI64((*_iter162)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnIndex &a, ColumnIndex &b) { - using ::std::swap; - swap(a.null_pages, b.null_pages); - swap(a.min_values, b.min_values); - swap(a.max_values, b.max_values); - swap(a.boundary_order, b.boundary_order); - swap(a.null_counts, b.null_counts); - swap(a.__isset, b.__isset); -} - -ColumnIndex::ColumnIndex(const ColumnIndex& other163) { - null_pages = other163.null_pages; - min_values = other163.min_values; - max_values = other163.max_values; - boundary_order = other163.boundary_order; - null_counts = other163.null_counts; - __isset = other163.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other164) { - null_pages = other164.null_pages; - min_values = other164.min_values; - max_values = other164.max_values; - boundary_order = other164.boundary_order; - null_counts = other164.null_counts; - __isset = other164.__isset; - return *this; -} -void ColumnIndex::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnIndex("; - out << "null_pages=" << to_string(null_pages); - out << ", " << "min_values=" << to_string(min_values); - out << ", " << "max_values=" << to_string(max_values); - out << ", " << "boundary_order=" << to_string(boundary_order); - out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "")); - out << ")"; -} - - -AesGcmV1::~AesGcmV1() throw() { -} - - -void AesGcmV1::__set_aad_prefix(const std::string& val) { - this->aad_prefix = val; -__isset.aad_prefix = true; -} - -void AesGcmV1::__set_aad_file_unique(const std::string& val) { - this->aad_file_unique = val; -__isset.aad_file_unique = true; -} - -void AesGcmV1::__set_supply_aad_prefix(const bool val) { - this->supply_aad_prefix = val; -__isset.supply_aad_prefix = true; -} -std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t AesGcmV1::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmV1::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(AesGcmV1 &a, AesGcmV1 &b) { - using ::std::swap; - swap(a.aad_prefix, b.aad_prefix); - swap(a.aad_file_unique, b.aad_file_unique); - swap(a.supply_aad_prefix, b.supply_aad_prefix); - swap(a.__isset, b.__isset); -} - -AesGcmV1::AesGcmV1(const AesGcmV1& other165) { - aad_prefix = other165.aad_prefix; - aad_file_unique = other165.aad_file_unique; - supply_aad_prefix = other165.supply_aad_prefix; - __isset = other165.__isset; -} -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other166) { - aad_prefix = other166.aad_prefix; - aad_file_unique = other166.aad_file_unique; - supply_aad_prefix = other166.supply_aad_prefix; - __isset = other166.__isset; - return *this; -} -void AesGcmV1::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "AesGcmV1("; - out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); - out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "")); - out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "")); - out << ")"; -} - - -AesGcmCtrV1::~AesGcmCtrV1() throw() { -} - - -void AesGcmCtrV1::__set_aad_prefix(const std::string& val) { - this->aad_prefix = val; -__isset.aad_prefix = true; -} - -void AesGcmCtrV1::__set_aad_file_unique(const std::string& val) { - this->aad_file_unique = val; -__isset.aad_file_unique = true; -} - -void AesGcmCtrV1::__set_supply_aad_prefix(const bool val) { - this->supply_aad_prefix = val; -__isset.supply_aad_prefix = true; -} -std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t AesGcmCtrV1::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmCtrV1::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmCtrV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { - using ::std::swap; - swap(a.aad_prefix, b.aad_prefix); - swap(a.aad_file_unique, b.aad_file_unique); - swap(a.supply_aad_prefix, b.supply_aad_prefix); - swap(a.__isset, b.__isset); -} - -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other167) { - aad_prefix = other167.aad_prefix; - aad_file_unique = other167.aad_file_unique; - supply_aad_prefix = other167.supply_aad_prefix; - __isset = other167.__isset; -} -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other168) { - aad_prefix = other168.aad_prefix; - aad_file_unique = other168.aad_file_unique; - supply_aad_prefix = other168.supply_aad_prefix; - __isset = other168.__isset; - return *this; -} -void AesGcmCtrV1::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "AesGcmCtrV1("; - out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); - out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "")); - out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "")); - out << ")"; -} - - -EncryptionAlgorithm::~EncryptionAlgorithm() throw() { -} - - -void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) { - this->AES_GCM_V1 = val; -__isset.AES_GCM_V1 = true; -} - -void EncryptionAlgorithm::__set_AES_GCM_CTR_V1(const AesGcmCtrV1& val) { - this->AES_GCM_CTR_V1 = val; -__isset.AES_GCM_CTR_V1 = true; -} -std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t EncryptionAlgorithm::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_V1.read(iprot); - this->__isset.AES_GCM_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_CTR_V1.read(iprot); - this->__isset.AES_GCM_CTR_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionAlgorithm::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionAlgorithm"); - - if (this->__isset.AES_GCM_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_V1", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->AES_GCM_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.AES_GCM_CTR_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->AES_GCM_CTR_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { - using ::std::swap; - swap(a.AES_GCM_V1, b.AES_GCM_V1); - swap(a.AES_GCM_CTR_V1, b.AES_GCM_CTR_V1); - swap(a.__isset, b.__isset); -} - -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other169) { - AES_GCM_V1 = other169.AES_GCM_V1; - AES_GCM_CTR_V1 = other169.AES_GCM_CTR_V1; - __isset = other169.__isset; -} -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other170) { - AES_GCM_V1 = other170.AES_GCM_V1; - AES_GCM_CTR_V1 = other170.AES_GCM_CTR_V1; - __isset = other170.__isset; - return *this; -} -void EncryptionAlgorithm::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionAlgorithm("; - out << "AES_GCM_V1="; (__isset.AES_GCM_V1 ? (out << to_string(AES_GCM_V1)) : (out << "")); - out << ", " << "AES_GCM_CTR_V1="; (__isset.AES_GCM_CTR_V1 ? (out << to_string(AES_GCM_CTR_V1)) : (out << "")); - out << ")"; -} - - -FileMetaData::~FileMetaData() throw() { -} - - -void FileMetaData::__set_version(const int32_t val) { - this->version = val; -} - -void FileMetaData::__set_schema(const std::vector & val) { - this->schema = val; -} - -void FileMetaData::__set_num_rows(const int64_t val) { - this->num_rows = val; -} - -void FileMetaData::__set_row_groups(const std::vector & val) { - this->row_groups = val; -} - -void FileMetaData::__set_key_value_metadata(const std::vector & val) { - this->key_value_metadata = val; -__isset.key_value_metadata = true; -} - -void FileMetaData::__set_created_by(const std::string& val) { - this->created_by = val; -__isset.created_by = true; -} - -void FileMetaData::__set_column_orders(const std::vector & val) { - this->column_orders = val; -__isset.column_orders = true; -} - -void FileMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { - this->encryption_algorithm = val; -__isset.encryption_algorithm = true; -} - -void FileMetaData::__set_footer_signing_key_metadata(const std::string& val) { - this->footer_signing_key_metadata = val; -__isset.footer_signing_key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t FileMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_version = false; - bool isset_schema = false; - bool isset_num_rows = false; - bool isset_row_groups = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->version); - isset_version = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->schema.clear(); - uint32_t _size171; - ::duckdb_apache::thrift::protocol::TType _etype174; - xfer += iprot->readListBegin(_etype174, _size171); - this->schema.resize(_size171); - uint32_t _i175; - for (_i175 = 0; _i175 < _size171; ++_i175) - { - xfer += this->schema[_i175].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->row_groups.clear(); - uint32_t _size176; - ::duckdb_apache::thrift::protocol::TType _etype179; - xfer += iprot->readListBegin(_etype179, _size176); - this->row_groups.resize(_size176); - uint32_t _i180; - for (_i180 = 0; _i180 < _size176; ++_i180) - { - xfer += this->row_groups[_i180].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_row_groups = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size181; - ::duckdb_apache::thrift::protocol::TType _etype184; - xfer += iprot->readListBegin(_etype184, _size181); - this->key_value_metadata.resize(_size181); - uint32_t _i185; - for (_i185 = 0; _i185 < _size181; ++_i185) - { - xfer += this->key_value_metadata[_i185].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->created_by); - this->__isset.created_by = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->column_orders.clear(); - uint32_t _size186; - ::duckdb_apache::thrift::protocol::TType _etype189; - xfer += iprot->readListBegin(_etype189, _size186); - this->column_orders.resize(_size186); - uint32_t _i190; - for (_i190 = 0; _i190 < _size186; ++_i190) - { - xfer += this->column_orders[_i190].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.column_orders = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - this->__isset.encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->footer_signing_key_metadata); - this->__isset.footer_signing_key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_version) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_row_groups) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileMetaData"); - - xfer += oprot->writeFieldBegin("version", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->version); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("schema", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter191; - for (_iter191 = this->schema.begin(); _iter191 != this->schema.end(); ++_iter191) - { - xfer += (*_iter191).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("row_groups", ::duckdb_apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter192; - for (_iter192 = this->row_groups.begin(); _iter192 != this->row_groups.end(); ++_iter192) - { - xfer += (*_iter192).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::duckdb_apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter193; - for (_iter193 = this->key_value_metadata.begin(); _iter193 != this->key_value_metadata.end(); ++_iter193) - { - xfer += (*_iter193).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.created_by) { - xfer += oprot->writeFieldBegin("created_by", ::duckdb_apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeString(this->created_by); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_orders) { - xfer += oprot->writeFieldBegin("column_orders", ::duckdb_apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter194; - for (_iter194 = this->column_orders.begin(); _iter194 != this->column_orders.end(); ++_iter194) - { - xfer += (*_iter194).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encryption_algorithm) { - xfer += oprot->writeFieldBegin("encryption_algorithm", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.footer_signing_key_metadata) { - xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->footer_signing_key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(FileMetaData &a, FileMetaData &b) { - using ::std::swap; - swap(a.version, b.version); - swap(a.schema, b.schema); - swap(a.num_rows, b.num_rows); - swap(a.row_groups, b.row_groups); - swap(a.key_value_metadata, b.key_value_metadata); - swap(a.created_by, b.created_by); - swap(a.column_orders, b.column_orders); - swap(a.encryption_algorithm, b.encryption_algorithm); - swap(a.footer_signing_key_metadata, b.footer_signing_key_metadata); - swap(a.__isset, b.__isset); -} - -FileMetaData::FileMetaData(const FileMetaData& other195) { - version = other195.version; - schema = other195.schema; - num_rows = other195.num_rows; - row_groups = other195.row_groups; - key_value_metadata = other195.key_value_metadata; - created_by = other195.created_by; - column_orders = other195.column_orders; - encryption_algorithm = other195.encryption_algorithm; - footer_signing_key_metadata = other195.footer_signing_key_metadata; - __isset = other195.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other196) { - version = other196.version; - schema = other196.schema; - num_rows = other196.num_rows; - row_groups = other196.row_groups; - key_value_metadata = other196.key_value_metadata; - created_by = other196.created_by; - column_orders = other196.column_orders; - encryption_algorithm = other196.encryption_algorithm; - footer_signing_key_metadata = other196.footer_signing_key_metadata; - __isset = other196.__isset; - return *this; -} - -void FileMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "FileMetaData("; - out << "version=" << to_string(version); - out << ", " << "schema=" << to_string(schema); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "row_groups=" << to_string(row_groups); - out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "")); - out << ", " << "created_by="; (__isset.created_by ? (out << to_string(created_by)) : (out << "")); - out << ", " << "column_orders="; (__isset.column_orders ? (out << to_string(column_orders)) : (out << "")); - out << ", " << "encryption_algorithm="; (__isset.encryption_algorithm ? (out << to_string(encryption_algorithm)) : (out << "")); - out << ", " << "footer_signing_key_metadata="; (__isset.footer_signing_key_metadata ? (out << to_string(footer_signing_key_metadata)) : (out << "")); - out << ")"; -} - - -FileCryptoMetaData::~FileCryptoMetaData() throw() { -} - - -void FileCryptoMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { - this->encryption_algorithm = val; -} - -void FileCryptoMetaData::__set_key_metadata(const std::string& val) { - this->key_metadata = val; -__isset.key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t FileCryptoMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_encryption_algorithm = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - isset_encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_encryption_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileCryptoMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileCryptoMetaData"); - - xfer += oprot->writeFieldBegin("encryption_algorithm", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { - using ::std::swap; - swap(a.encryption_algorithm, b.encryption_algorithm); - swap(a.key_metadata, b.key_metadata); - swap(a.__isset, b.__isset); -} - -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other197) { - encryption_algorithm = other197.encryption_algorithm; - key_metadata = other197.key_metadata; - __isset = other197.__isset; -} -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other198) { - encryption_algorithm = other198.encryption_algorithm; - key_metadata = other198.key_metadata; - __isset = other198.__isset; - return *this; -} -void FileCryptoMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "FileCryptoMetaData("; - out << "encryption_algorithm=" << to_string(encryption_algorithm); - out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); - out << ")"; -} - -}} // namespace - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * Common functions of New Generation Entropy library - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************* -* Dependencies -***************************************/ - - /* ERR_*, ERROR */ - - - - - -namespace duckdb_zstd { - -/*=== Version ===*/ -unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } - - -/*=== Error Management ===*/ -unsigned FSE_isError(size_t code) { return ERR_isError(code); } -const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } - -unsigned HUF_isError(size_t code) { return ERR_isError(code); } -const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } - - -/*-************************************************************** -* FSE NCount encoding-decoding -****************************************************************/ -size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) -{ - const BYTE* const istart = (const BYTE*) headerBuffer; - const BYTE* const iend = istart + hbSize; - const BYTE* ip = istart; - int nbBits; - int remaining; - int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; - - if (hbSize < 4) { - /* This function only works when hbSize >= 4 */ - char buffer[4]; - memset(buffer, 0, sizeof(buffer)); - memcpy(buffer, headerBuffer, hbSize); - { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, - buffer, sizeof(buffer)); - if (FSE_isError(countSize)) return countSize; - if (countSize > hbSize) return ERROR(corruption_detected); - return countSize; - } } - assert(hbSize >= 4); - - /* init */ - memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ - bitStream = MEM_readLE32(ip); - nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ - if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); - bitStream >>= 4; - bitCount = 4; - *tableLogPtr = nbBits; - remaining = (1<1) & (charnum<=*maxSVPtr)) { - if (previous0) { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) { - n0 += 24; - if (ip < iend-5) { - ip += 2; - bitStream = MEM_readLE32(ip) >> bitCount; - } else { - bitStream >>= 16; - bitCount += 16; - } } - while ((bitStream & 3) == 3) { - n0 += 3; - bitStream >>= 2; - bitCount += 2; - } - n0 += bitStream & 3; - bitCount += 2; - if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) normalizedCounter[charnum++] = 0; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { - assert((bitCount >> 3) <= 3); /* For first condition to work */ - ip += bitCount>>3; - bitCount &= 7; - bitStream = MEM_readLE32(ip) >> bitCount; - } else { - bitStream >>= 2; - } } - { int const max = (2*threshold-1) - remaining; - int count; - - if ((bitStream & (threshold-1)) < (U32)max) { - count = bitStream & (threshold-1); - bitCount += nbBits-1; - } else { - count = bitStream & (2*threshold-1); - if (count >= threshold) count -= max; - bitCount += nbBits; - } - - count--; /* extra accuracy */ - remaining -= count < 0 ? -count : count; /* -1 means +1 */ - normalizedCounter[charnum++] = (short)count; - previous0 = !count; - while (remaining < threshold) { - nbBits--; - threshold >>= 1; - } - - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { - ip += bitCount>>3; - bitCount &= 7; - } else { - bitCount -= (int)(8 * (iend - 4 - ip)); - ip = iend - 4; - } - bitStream = MEM_readLE32(ip) >> (bitCount & 31); - } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ - if (remaining != 1) return ERROR(corruption_detected); - if (bitCount > 32) return ERROR(corruption_detected); - *maxSVPtr = charnum-1; - - ip += (bitCount+7)>>3; - return ip-istart; -} - - -/*! HUF_readStats() : - Read compact Huffman tree, saved by HUF_writeCTable(). - `huffWeight` is destination buffer. - `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. - @return : size read from `src` , or an error Code . - Note : Needed by HUF_readCTable() and HUF_readDTableX?() . -*/ -size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, - U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize) -{ - U32 weightTotal; - const BYTE* ip = (const BYTE*) src; - size_t iSize; - size_t oSize; - - if (!srcSize) return ERROR(srcSize_wrong); - iSize = ip[0]; - /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ - - if (iSize >= 128) { /* special header */ - oSize = iSize - 127; - iSize = ((oSize+1)/2); - if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - if (oSize >= hwSize) return ERROR(corruption_detected); - ip += 1; - { U32 n; - for (n=0; n> 4; - huffWeight[n+1] = ip[n/2] & 15; - } } } - else { /* header compressed with FSE (normal case) */ - FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ - if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ - if (FSE_isError(oSize)) return oSize; - } - - /* collect weight stats */ - memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); - weightTotal = 0; - { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); - rankStats[huffWeight[n]]++; - weightTotal += (1 << huffWeight[n]) >> 1; - } } - if (weightTotal == 0) return ERROR(corruption_detected); - - /* get last non-null symbol weight (implied, total must be 2^n) */ - { U32 const tableLog = BIT_highbit32(weightTotal) + 1; - if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); - *tableLogPtr = tableLog; - /* determine last weight */ - { U32 const total = 1 << tableLog; - U32 const rest = total - weightTotal; - U32 const verif = 1 << BIT_highbit32(rest); - U32 const lastWeight = BIT_highbit32(rest) + 1; - if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankStats[lastWeight]++; - } } - - /* check tree construction validity */ - if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ - - /* results */ - *nbSymbolsPtr = (U32)(oSize+1); - return iSize+1; -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* The purpose of this file is to have a single list of error strings embedded in binary */ - - - -namespace duckdb_zstd { - -const char* ERR_getErrorString(ERR_enum code) -{ -#ifdef ZSTD_STRIP_ERROR_STRINGS - (void)code; - return "Error strings stripped"; -#else - static const char* const notErrorCode = "Unspecified error code"; - switch( code ) - { - case PREFIX(no_error): return "No error detected"; - case PREFIX(GENERIC): return "Error (generic)"; - case PREFIX(prefix_unknown): return "Unknown frame descriptor"; - case PREFIX(version_unsupported): return "Version not supported"; - case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; - case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; - case PREFIX(corruption_detected): return "Corrupted block detected"; - case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; - case PREFIX(parameter_unsupported): return "Unsupported parameter"; - case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; - case PREFIX(init_missing): return "Context should be init first"; - case PREFIX(memory_allocation): return "Allocation error : not enough memory"; - case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; - case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; - case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; - case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; - case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; - case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; - case PREFIX(dictionary_wrong): return "Dictionary mismatch"; - case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; - case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; - case PREFIX(srcSize_wrong): return "Src size is incorrect"; - case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; - /* following error codes are not stable and may be removed or changed in a future version */ - case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; - case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; - case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; - case PREFIX(maxCode): - default: return notErrorCode; - } -#endif -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* ****************************************************************** - * FSE : Finite State Entropy decoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - - -/* ************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ - - - - - - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define FSE_isError ERR_isError -#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ - - -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -# error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -# error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X,Y) X##Y -#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) -#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - -namespace duckdb_zstd { - -/* Function templates */ -FSE_DTable* FSE_createDTable (unsigned tableLog) -{ - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); -} - -void FSE_freeDTable (FSE_DTable* dt) -{ - free(dt); -} - -size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ - FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); - U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; - - U32 const maxSV1 = maxSymbolValue + 1; - U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; - - /* Sanity Checks */ - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - - /* Init, lay down lowprob symbols */ - { FSE_DTableHeader DTableH; - DTableH.tableLog = (U16)tableLog; - DTableH.fastMode = 1; - { S16 const largeLimit= (S16)(1 << (tableLog-1)); - U32 s; - for (s=0; s= largeLimit) DTableH.fastMode=0; - symbolNext[s] = normalizedCounter[s]; - } } } - memcpy(dt, &DTableH, sizeof(DTableH)); - } - - /* Spread symbols */ - { U32 const tableMask = tableSize-1; - U32 const step = FSE_TABLESTEP(tableSize); - U32 s, position = 0; - for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } } - if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } - - /* Build Decoding table */ - { U32 u; - for (u=0; utableLog = 0; - DTableH->fastMode = 0; - - cell->newState = 0; - cell->symbol = symbolValue; - cell->nbBits = 0; - - return 0; -} - - -size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - void* dPtr = dt + 1; - FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSV1 = tableMask+1; - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* Build Decoding Table */ - DTableH->tableLog = (U16)nbBits; - DTableH->fastMode = 1; - for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[1] = FSE_GETSYMBOL(&state2); - - if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } - - op[2] = FSE_GETSYMBOL(&state1); - - if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[3] = FSE_GETSYMBOL(&state2); - } - - /* tail */ - /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ - while (1) { - if (op>(omax-2)) return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state1); - if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state2); - break; - } - - if (op>(omax-2)) return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state2); - if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state1); - break; - } } - - return op-ostart; -} - - -size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize, - const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; - const U32 fastMode = DTableH->fastMode; - - /* select fast mode (static) */ - if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -} - - -size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) -{ - const BYTE* const istart = (const BYTE*)cSrc; - const BYTE* ip = istart; - short counting[FSE_MAX_SYMBOL_VALUE+1]; - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - - /* normal FSE decoding mode */ - size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); - if (FSE_isError(NCountLength)) return NCountLength; - /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ - if (tableLog > maxLog) return ERROR(tableLog_tooLarge); - ip += NCountLength; - cSrcSize -= NCountLength; - - CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); - - return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ -} - - -typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; - -size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) -{ - DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ - return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); -} - -} - -#endif /* FSE_COMMONDEFS_ONLY */ - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * xxHash - Fast Hash algorithm - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - xxHash homepage: http://www.xxhash.com - * - xxHash source repository : https://github.com/Cyan4973/xxHash - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -*/ - - -/* ************************************* -* Tuning parameters -***************************************/ -/*!XXH_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. - * It can generate buggy code on targets which do not support unaligned memory accesses. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://stackoverflow.com/a/32095106/646947 for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define XXH_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ - defined(__ICCARM__) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -/*!XXH_ACCEPT_NULL_INPUT_POINTER : - * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. - * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. - * By default, this option is disabled. To enable it, uncomment below define : - */ -/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ - -/*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. - * Results are therefore identical for little-endian and big-endian CPU. - * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independence be of no importance for your application, you may set the #define below to 1, - * to improve speed for Big-endian CPU. - * This option has no impact on Little_Endian CPU. - */ -#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ -# define XXH_FORCE_NATIVE_FORMAT 0 -#endif - -/*!XXH_FORCE_ALIGN_CHECK : - * This is a minor performance trick, only useful with lots of very small keys. - * It means : check for aligned/unaligned input. - * The check costs one initial branch per hash; set to 0 when the input data - * is guaranteed to be aligned. - */ -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ -# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif -#endif - - -/* ************************************* -* Includes & Memory related functions -***************************************/ -/* Modify the local functions below should you wish to use some other memory routines */ -/* for malloc(), free() */ -#include -#include /* size_t */ -/* for memcpy() */ -#include - - - - -/* ************************************* -* Compiler Specific Options -***************************************/ -#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif - -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif - -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR - - -/* ************************************* -* Basic Types -***************************************/ -#ifndef MEM_MODULE -# define MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -# else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ -# endif -#endif - -namespace duckdb_zstd { -static void* XXH_malloc(size_t s) { return malloc(s); } -static void XXH_free (void* p) { free(p); } -static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } -static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; - -static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } - -#else - -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 - */ - -static U32 XXH_read32(const void* memPtr) -{ - U32 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -static U64 XXH_read64(const void* memPtr) -{ - U64 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - - -/* **************************************** -* Compiler-specific Functions and Macros -******************************************/ -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -#if defined(__ICCARM__) -# include -# define XXH_rotl32(x,r) __ROR(x,(32 - r)) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -#endif -# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) -#endif - -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong -# define XXH_swap64 _byteswap_uint64 -#elif GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -# define XXH_swap64 __builtin_bswap64 -#else -static U32 XXH_swap32 (U32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); -} -static U64 XXH_swap64 (U64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} -#endif - - -/* ************************************* -* Architecture Macros -***************************************/ -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; - -/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ -#ifndef XXH_CPU_LITTLE_ENDIAN - static const int g_one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) -#endif - - -/* *************************** -* Memory reads -*****************************/ -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; - -FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); - else - return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); -} - -FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE32_align(ptr, endian, XXH_unaligned); -} - -static U32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); -} - -FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); - else - return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); -} - -FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE64_align(ptr, endian, XXH_unaligned); -} - -static U64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); -} - - -/* ************************************* -* Macros -***************************************/ -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ - - -/* ************************************* -* Constants -***************************************/ -static const U32 PRIME32_1 = 2654435761U; -static const U32 PRIME32_2 = 2246822519U; -static const U32 PRIME32_3 = 3266489917U; -static const U32 PRIME32_4 = 668265263U; -static const U32 PRIME32_5 = 374761393U; - -static const U64 PRIME64_1 = 11400714785074694791ULL; -static const U64 PRIME64_2 = 14029467366897019727ULL; -static const U64 PRIME64_3 = 1609587929392839161ULL; -static const U64 PRIME64_4 = 9650029242287828579ULL; -static const U64 PRIME64_5 = 2870177450012600261ULL; - -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - - -/* ************************** -* Utils -****************************/ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* __restrict dstState, const XXH32_state_t* __restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* __restrict dstState, const XXH64_state_t* __restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - - -/* *************************** -* Simple Hash Functions -*****************************/ - -static U32 XXH32_round(U32 seed, U32 input) -{ - seed += input * PRIME32_2; - seed = XXH_rotl32(seed, 13); - seed *= PRIME32_1; - return seed; -} - -FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)16; - } -#endif - - if (len>=16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; - - do { - v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; - v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; - v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; - v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p<=limit); - - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } else { - h32 = seed + PRIME32_5; - } - - h32 += (U32) len; - - while (p+4<=bEnd) { - h32 += XXH_get32bits(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_CREATESTATE_STATIC(state); - XXH32_reset(state, seed); - XXH32_update(state, input, len); - return XXH32_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - - -static U64 XXH64_round(U64 acc, U64 input) -{ - acc += input * PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= PRIME64_1; - return acc; -} - -static U64 XXH64_mergeRound(U64 acc, U64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * PRIME64_1 + PRIME64_4; - return acc; -} - -FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)32; - } -#endif - - if (len>=32) { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; - - do { - v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; - v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; - v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; - v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; - } while (p<=limit); - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - - } else { - h64 = seed + PRIME64_5; - } - - h64 += (U64) len; - - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; -} - - -XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_CREATESTATE_STATIC(state); - XXH64_reset(state, seed); - XXH64_update(state, input, len); - return XXH64_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - - -/* ************************************************** -* Advanced Hash Functions -****************************************************/ - -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - - -/*** Hash feed ***/ - -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) -{ - XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ - state.v1 = seed + PRIME32_1 + PRIME32_2; - state.v2 = seed + PRIME32_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; -} - - -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) -{ - XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ - state.v1 = seed + PRIME64_1 + PRIME64_2; - state.v2 = seed + PRIME64_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; -} - - -FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif - - state->total_len_32 += (unsigned)len; - state->large_len |= (len>=16) | (state->total_len_32>=16); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (unsigned)len; - return XXH_OK; - } - - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const U32* p32 = state->mem32; - state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; - state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; - state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; - state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; - } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; - - do { - v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; - v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; - v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; - v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - - return XXH_OK; -} - -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH32_update_endian(state_in, input, len, XXH_bigEndian); -} - - - -FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) -{ - const BYTE * p = (const BYTE*)state->mem32; - const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; - U32 h32; - - if (state->large_len) { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - while (p+4<=bEnd) { - h32 += XXH_readLE32(p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_digest_endian(state_in, XXH_littleEndian); - else - return XXH32_digest_endian(state_in, XXH_bigEndian); -} - - - -/* **** XXH64 **** */ - -FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif - - state->total_len += len; - - if (state->memsize + len < 32) { /* fill in tmp buffer */ - if (input != NULL) { - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); - } - state->memsize += (U32)len; - return XXH_OK; - } - - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); - state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); - state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); - state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); - p += 32-state->memsize; - state->memsize = 0; - } - - if (p+32 <= bEnd) { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; - - do { - v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; - v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; - v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; - v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - - return XXH_OK; -} - -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH64_update_endian(state_in, input, len, XXH_bigEndian); -} - - - -FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) -{ - const BYTE * p = (const BYTE*)state->mem64; - const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; - U64 h64; - - if (state->total_len >= 32) { - U64 const v1 = state->v1; - U64 const v2 = state->v2; - U64 const v3 = state->v3; - U64 const v4 = state->v4; - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - } else { - h64 = state->v3 + PRIME64_5; - } - - h64 += (U64) state->total_len; - - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; -} - - -XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_digest_endian(state_in, XXH_littleEndian); - else - return XXH64_digest_endian(state_in, XXH_bigEndian); -} - - -/* ************************** -* Canonical representation -****************************/ - -/*! Default XXH result types are basic unsigned 32 and 64 bits. -* The canonical representation follows human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. -*/ - -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - memcpy(dst, &hash, sizeof(*dst)); -} - -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - memcpy(dst, &hash, sizeof(*dst)); -} - -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} - -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #4 -// See the end of this file for a list - -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - - -/*-************************************* -* Dependencies -***************************************/ -#include /* malloc, calloc, free */ -#include /* memset */ - - - -namespace duckdb_zstd { - -/*-**************************************** -* Version -******************************************/ -unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } - -const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } - - -/*-**************************************** -* ZSTD Error Management -******************************************/ -#undef ZSTD_isError /* defined within zstd_internal.h */ -/*! ZSTD_isError() : - * tells if a return value is an error code - * symbol is required for external callers */ -unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } - -/*! ZSTD_getErrorName() : - * provides error code string from function result (useful for debugging) */ -const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } - -/*! ZSTD_getError() : - * convert a `size_t` function result into a proper ZSTD_errorCode enum */ -ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } - -/*! ZSTD_getErrorString() : - * provides error code string from enum */ -const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } - - - -/*=************************************************************** -* Custom allocator -****************************************************************/ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) -{ - if (customMem.customAlloc) - return customMem.customAlloc(customMem.opaque, size); - return malloc(size); -} - -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) -{ - if (customMem.customAlloc) { - /* calloc implemented as malloc+memset; - * not as efficient as calloc, but next best guess for custom malloc */ - void* const ptr = customMem.customAlloc(customMem.opaque, size); - memset(ptr, 0, size); - return ptr; - } - return calloc(1, size); -} - -void ZSTD_free(void* ptr, ZSTD_customMem customMem) -{ - if (ptr!=NULL) { - if (customMem.customFree) - customMem.customFree(customMem.opaque, ptr); - else - free(ptr); - } -} - -} - - -// LICENSE_CHANGE_END - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - - - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -TProtocol::~TProtocol() = default; -uint32_t TProtocol::skip_virt(TType type) { - return ::duckdb_apache::thrift::protocol::skip(*this, type); -} - -TProtocolFactory::~TProtocolFactory() = default; - -}}} // duckdb_apache::thrift::protocol - - -// LICENSE_CHANGE_END diff --git a/velox/external/duckdb/parquet-amalgamation.hpp b/velox/external/duckdb/parquet-amalgamation.hpp deleted file mode 100644 index 9b437423317e..000000000000 --- a/velox/external/duckdb/parquet-amalgamation.hpp +++ /dev/null @@ -1,7989 +0,0 @@ -/* -Copyright 2018-2022 Stichting DuckDB Foundation - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#pragma once - - -#include "duckdb.hpp" - -namespace duckdb { - -class ParquetExtension : public Extension { -public: - void Load(DuckDB &db) override; - std::string Name() override; -}; - -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/common.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/types/data_chunk.hpp" -#endif -//===----------------------------------------------------------------------===// -// DuckDB -// -// column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 -// See the end of this file for a list - -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#ifndef parquet_TYPES_H -#define parquet_TYPES_H - -#include - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_THRIFT_H_ -#define _DUCKDB_THRIFT_THRIFT_H_ 1 - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -// clang-format off - -#ifndef _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_ -# define _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_ - -#ifdef _WIN32 -#ifdef _WINSOCKAPI_ -#undef _WINSOCKAPI_ -#endif -# include -# define THRIFT_GET_SOCKET_ERROR ::WSAGetLastError() -# define THRIFT_ERRNO (*_errno()) -# define THRIFT_EINPROGRESS WSAEINPROGRESS -# define THRIFT_EAGAIN WSAEWOULDBLOCK -# define THRIFT_EINTR WSAEINTR -# define THRIFT_ECONNRESET WSAECONNRESET -# define THRIFT_ENOTCONN WSAENOTCONN -# define THRIFT_ETIMEDOUT WSAETIMEDOUT -# define THRIFT_EWOULDBLOCK WSAEWOULDBLOCK -# define THRIFT_EPIPE WSAECONNRESET -# define THRIFT_NO_SOCKET_CACHING SO_EXCLUSIVEADDRUSE -# define THRIFT_SOCKET SOCKET -# define THRIFT_INVALID_SOCKET INVALID_SOCKET -# define THRIFT_SOCKETPAIR thrift_socketpair -# define THRIFT_FCNTL thrift_fcntl -# define THRIFT_O_NONBLOCK 1 -# define THRIFT_F_GETFL 0 -# define THRIFT_F_SETFL 1 -# define THRIFT_GETTIMEOFDAY thrift_gettimeofday -# define THRIFT_CLOSESOCKET closesocket -# define THRIFT_CLOSE _close -# define THRIFT_OPEN _open -# define THRIFT_FTRUNCATE _chsize_s -# define THRIFT_FSYNC _commit -# define THRIFT_LSEEK _lseek -# define THRIFT_WRITE _write -# define THRIFT_READ _read -# define THRIFT_IOCTL_SOCKET ioctlsocket -# define THRIFT_IOCTL_SOCKET_NUM_BYTES_TYPE u_long -# define THRIFT_FSTAT _fstat -# define THRIFT_STAT _stat -# ifdef _WIN32_WCE -# define THRIFT_GAI_STRERROR(...) thrift_wstr2str(gai_strerrorW(__VA_ARGS__)) -# else -# define THRIFT_GAI_STRERROR gai_strerrorA -# endif -# define THRIFT_SSIZET ptrdiff_t -# if (_MSC_VER < 1900) -# define THRIFT_SNPRINTF _snprintf -# else -# define THRIFT_SNPRINTF snprintf -# endif -# define THRIFT_SLEEP_SEC thrift_sleep -# define THRIFT_SLEEP_USEC thrift_usleep -# define THRIFT_TIMESPEC thrift_timespec -# define THRIFT_CTIME_R thrift_ctime_r -# define THRIFT_POLL thrift_poll -# if WINVER <= 0x0502 //XP, Server2003 -# define THRIFT_POLLFD thrift_pollfd -# define THRIFT_POLLIN 0x0300 -# define THRIFT_POLLOUT 0x0010 -# else //Vista, Win7... -# define THRIFT_POLLFD pollfd -# define THRIFT_POLLIN POLLIN -# define THRIFT_POLLOUT POLLOUT -# endif //WINVER -# define THRIFT_SHUT_RDWR SD_BOTH -# if !defined(AI_ADDRCONFIG) -# define AI_ADDRCONFIG 0x00000400 -# endif -#else //not _WIN32 -# include -# define THRIFT_GET_SOCKET_ERROR errno -# define THRIFT_ERRNO errno -# define THRIFT_EINTR EINTR -# define THRIFT_EINPROGRESS EINPROGRESS -# define THRIFT_ECONNRESET ECONNRESET -# define THRIFT_ENOTCONN ENOTCONN -# define THRIFT_ETIMEDOUT ETIMEDOUT -# define THRIFT_EWOULDBLOCK EWOULDBLOCK -# define THRIFT_EAGAIN EAGAIN -# define THRIFT_EPIPE EPIPE -# define THRIFT_NO_SOCKET_CACHING SO_REUSEADDR -# define THRIFT_SOCKET int -# define THRIFT_INVALID_SOCKET (-1) -# define THRIFT_SOCKETPAIR socketpair -# define THRIFT_FCNTL fcntl -# define THRIFT_O_NONBLOCK O_NONBLOCK -# define THRIFT_F_GETFL F_GETFL -# define THRIFT_F_SETFL F_SETFL -# define THRIFT_GETTIMEOFDAY gettimeofday -# define THRIFT_CLOSESOCKET close -# define THRIFT_CLOSE close -# define THRIFT_OPEN open -# define THRIFT_FTRUNCATE ftruncate -# define THRIFT_FSYNC fsync -# define THRIFT_LSEEK lseek -# define THRIFT_WRITE write -# define THRIFT_READ read -# define THRIFT_IOCTL_SOCKET ioctl -# define THRIFT_IOCTL_SOCKET_NUM_BYTES_TYPE int -# define THRIFT_STAT stat -# define THRIFT_FSTAT fstat -# define THRIFT_GAI_STRERROR gai_strerror -# define THRIFT_SSIZET ssize_t -# define THRIFT_SNPRINTF snprintf -# define THRIFT_SLEEP_SEC sleep -# define THRIFT_SLEEP_USEC usleep -# define THRIFT_TIMESPEC timespec -# define THRIFT_CTIME_R ctime_r -# define THRIFT_POLL poll -# define THRIFT_POLLFD pollfd -# define THRIFT_POLLIN POLLIN -# define THRIFT_POLLOUT POLLOUT -# define THRIFT_SHUT_RDWR SHUT_RDWR -#endif - -#endif // _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_ - - -// LICENSE_CHANGE_END - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef THRIFT_CONFIG_H -#define THRIFT_CONFIG_H - - -#ifdef _WIN32 -#if defined(_M_IX86) || defined(_M_X64) -#define ARITHMETIC_RIGHT_SHIFT 1 -#define SIGNED_RIGHT_SHIFT_IS 1 -#endif -#else -#define SIGNED_RIGHT_SHIFT_IS 1 -#define ARITHMETIC_RIGHT_SHIFT 1 -#endif - -#endif - -// LICENSE_CHANGE_END - - -#include -#include - -#include -#ifdef HAVE_NETINET_IN_H -#include -#endif -#ifdef HAVE_INTTYPES_H -#include -#endif -#include -#include -#include -#include -#include -#include -#include - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TLOGGING_H_ -#define _DUCKDB_THRIFT_TLOGGING_H_ 1 - - - -/** - * Contains utility macros for debugging and logging. - * - */ - -#include - -#ifdef HAVE_STDINT_H -#include -#endif - -//namespace duckdb_apache { -//namespace thrift { -/** - * T_GLOBAL_DEBUGGING_LEVEL = 0: all debugging turned off, debug macros undefined - * T_GLOBAL_DEBUGGING_LEVEL = 1: all debugging turned on - */ -#define T_GLOBAL_DEBUGGING_LEVEL 0 - -/** - * T_GLOBAL_LOGGING_LEVEL = 0: all logging turned off, logging macros undefined - * T_GLOBAL_LOGGING_LEVEL = 1: all logging turned on - */ -#define T_GLOBAL_LOGGING_LEVEL 0 - -/** - * Standard wrapper around fprintf what will prefix the file name and line - * number to the line. Uses T_GLOBAL_DEBUGGING_LEVEL to control whether it is - * turned on or off. - * - * @param format_string - */ -#if T_GLOBAL_DEBUGGING_LEVEL > 0 -#define T_DEBUG(format_string, ...) \ - if (T_GLOBAL_DEBUGGING_LEVEL > 0) { \ - fprintf(stderr, "[%s,%d] " format_string " \n", __FILE__, __LINE__, ##__VA_ARGS__); \ - } -#else -#define T_DEBUG(format_string, ...) -#endif - -/** - * analogous to T_DEBUG but also prints the time - * - * @param string format_string input: printf style format string - */ -#if T_GLOBAL_DEBUGGING_LEVEL > 0 -#define T_DEBUG_T(format_string, ...) \ - { \ - if (T_GLOBAL_DEBUGGING_LEVEL > 0) { \ - time_t now; \ - char dbgtime[26]; \ - time(&now); \ - THRIFT_CTIME_R(&now, dbgtime); \ - dbgtime[24] = '\0'; \ - fprintf(stderr, \ - "[%s,%d] [%s] " format_string " \n", \ - __FILE__, \ - __LINE__, \ - dbgtime, \ - ##__VA_ARGS__); \ - } \ - } -#else -#define T_DEBUG_T(format_string, ...) -#endif - - - - - -/** - * Log input message - * - * @param string format_string input: printf style format string - */ -#if T_GLOBAL_LOGGING_LEVEL > 0 -#define T_LOG_OPER(format_string, ...) \ - { \ - if (T_GLOBAL_LOGGING_LEVEL > 0) { \ - time_t now; \ - char dbgtime[26]; \ - time(&now); \ - THRIFT_CTIME_R(&now, dbgtime); \ - dbgtime[24] = '\0'; \ - fprintf(stderr, "[%s] " format_string " \n", dbgtime, ##__VA_ARGS__); \ - } \ - } -#else -#define T_LOG_OPER(format_string, ...) -#endif - -/** - * T_GLOBAL_DEBUG_VIRTUAL = 0 or unset: normal operation, - * virtual call debug messages disabled - * T_GLOBAL_DEBUG_VIRTUAL = 1: log a debug messages whenever an - * avoidable virtual call is made - * T_GLOBAL_DEBUG_VIRTUAL = 2: record detailed info that can be - * printed by calling - * duckdb_apache::thrift::profile_print_info() - */ -#if T_GLOBAL_DEBUG_VIRTUAL > 1 -#define T_VIRTUAL_CALL() ::duckdb_apache::thrift::profile_virtual_call(typeid(*this)) -#define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot) \ - do { \ - if (!(specific_prot)) { \ - ::duckdb_apache::thrift::profile_generic_protocol(typeid(*template_class), typeid(*generic_prot)); \ - } \ - } while (0) -#elif T_GLOBAL_DEBUG_VIRTUAL == 1 -#define T_VIRTUAL_CALL() fprintf(stderr, "[%s,%d] virtual call\n", __FILE__, __LINE__) -#define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot) \ - do { \ - if (!(specific_prot)) { \ - fprintf(stderr, "[%s,%d] failed to cast to specific protocol type\n", __FILE__, __LINE__); \ - } \ - } while (0) -#else -#define T_VIRTUAL_CALL() -#define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot) -#endif - -//} -//} // end of duckdb_apache::thrift - -#endif // #ifndef _DUCKDB_THRIFT_TLOGGING_H_ - - -// LICENSE_CHANGE_END - -//#include - -#define THRIFT_UNUSED_VARIABLE(x) ((void)(x)) - -namespace duckdb_apache { -namespace thrift { - -class TEnumIterator - : public std::iterator > { -public: - TEnumIterator(int n, int* enums, const char** names) - : ii_(0), n_(n), enums_(enums), names_(names) {} - - int operator++() { return ++ii_; } - - bool operator!=(const TEnumIterator& end) { - THRIFT_UNUSED_VARIABLE(end); - assert(end.n_ == -1); - return (ii_ != n_); - } - - std::pair operator*() const { return std::make_pair(enums_[ii_], names_[ii_]); } - -private: - int ii_; - const int n_; - int* enums_; - const char** names_; -}; - -class TException : public std::exception { -public: - TException() : message_() {} - - TException(const std::string& message) : message_(message) {} - - ~TException() noexcept override = default; - - const char* what() const noexcept override { - if (message_.empty()) { - return "Default TException."; - } else { - return message_.c_str(); - } - } - -protected: - std::string message_; -}; - -class TDelayedException { -public: - template - static TDelayedException* delayException(const E& e); - virtual void throw_it() = 0; - virtual ~TDelayedException() = default; -}; - -template -class TExceptionWrapper : public TDelayedException { -public: - TExceptionWrapper(const E& e) : e_(e) {} - void throw_it() override { - E temp(e_); - delete this; - throw temp; - } - -private: - E e_; -}; - -template -TDelayedException* TDelayedException::delayException(const E& e) { - return new TExceptionWrapper(e); -} - -#if T_GLOBAL_DEBUG_VIRTUAL > 1 -void profile_virtual_call(const std::type_info& info); -void profile_generic_protocol(const std::type_info& template_type, const std::type_info& prot_type); -void profile_print_info(FILE* f); -void profile_print_info(); -void profile_write_pprof(FILE* gen_calls_f, FILE* virtual_calls_f); -#endif -} -} // duckdb_apache::thrift - -#endif // #ifndef _DUCKDB_THRIFT_THRIFT_H_ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ -#define _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ 1 - - - -namespace duckdb_apache { -namespace thrift { - -namespace protocol { -class TProtocol; -} - -class TApplicationException : public TException { -public: - /** - * Error codes for the various types of exceptions. - */ - enum TApplicationExceptionType { - UNKNOWN = 0, - UNKNOWN_METHOD = 1, - INVALID_MESSAGE_TYPE = 2, - WRONG_METHOD_NAME = 3, - BAD_SEQUENCE_ID = 4, - MISSING_RESULT = 5, - INTERNAL_ERROR = 6, - PROTOCOL_ERROR = 7, - INVALID_TRANSFORM = 8, - INVALID_PROTOCOL = 9, - UNSUPPORTED_CLIENT_TYPE = 10 - }; - - TApplicationException() : TException(), type_(UNKNOWN) {} - - TApplicationException(TApplicationExceptionType type) : TException(), type_(type) {} - - TApplicationException(const std::string& message) : TException(message), type_(UNKNOWN) {} - - TApplicationException(TApplicationExceptionType type, const std::string& message) - : TException(message), type_(type) {} - - ~TApplicationException() noexcept override = default; - - /** - * Returns an error code that provides information about the type of error - * that has occurred. - * - * @return Error code - */ - TApplicationExceptionType getType() const { return type_; } - - const char* what() const noexcept override { - if (message_.empty()) { - switch (type_) { - case UNKNOWN: - return "TApplicationException: Unknown application exception"; - case UNKNOWN_METHOD: - return "TApplicationException: Unknown method"; - case INVALID_MESSAGE_TYPE: - return "TApplicationException: Invalid message type"; - case WRONG_METHOD_NAME: - return "TApplicationException: Wrong method name"; - case BAD_SEQUENCE_ID: - return "TApplicationException: Bad sequence identifier"; - case MISSING_RESULT: - return "TApplicationException: Missing result"; - case INTERNAL_ERROR: - return "TApplicationException: Internal error"; - case PROTOCOL_ERROR: - return "TApplicationException: Protocol error"; - case INVALID_TRANSFORM: - return "TApplicationException: Invalid transform"; - case INVALID_PROTOCOL: - return "TApplicationException: Invalid protocol"; - case UNSUPPORTED_CLIENT_TYPE: - return "TApplicationException: Unsupported client type"; - default: - return "TApplicationException: (Invalid exception type)"; - }; - } else { - return message_.c_str(); - } - } - - uint32_t read(protocol::TProtocol* iprot); - uint32_t write(protocol::TProtocol* oprot) const; - -protected: - /** - * Error code - */ - TApplicationExceptionType type_; -}; -} -} // duckdb_apache::thrift - -#endif // #ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TBASE_H_ -#define _DUCKDB_THRIFT_TBASE_H_ 1 - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ -#define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1 - -#ifdef _WIN32 -// Need to come before any Windows.h includes -#include -#endif - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ -#define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ 1 - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ -#define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ 1 - -// FUCK OFF #include -#include - - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -/** - * Class to encapsulate all the possible types of transport errors that may - * occur in various transport systems. This provides a sort of generic - * wrapper around the vague UNIX E_ error codes that lets a common code - * base of error handling to be used for various types of transports, i.e. - * pipes etc. - * - */ -class TTransportException : public duckdb_apache::thrift::TException { -public: - /** - * Error codes for the various types of exceptions. - */ - enum TTransportExceptionType { - UNKNOWN = 0, - NOT_OPEN = 1, - TIMED_OUT = 2, - END_OF_FILE = 3, - INTERRUPTED = 4, - BAD_ARGS = 5, - CORRUPTED_DATA = 6, - INTERNAL_ERROR = 7 - }; - - TTransportException() : duckdb_apache::thrift::TException(), type_(UNKNOWN) {} - - TTransportException(TTransportExceptionType type) : duckdb_apache::thrift::TException(), type_(type) {} - - TTransportException(const std::string& message) - : duckdb_apache::thrift::TException(message), type_(UNKNOWN) {} - - TTransportException(TTransportExceptionType type, const std::string& message) - : duckdb_apache::thrift::TException(message), type_(type) {} - - TTransportException(TTransportExceptionType type, const std::string& message, int errno_copy) - : duckdb_apache::thrift::TException(message), type_(type) {} - - ~TTransportException() noexcept override = default; - - /** - * Returns an error code that provides information about the type of error - * that has occurred. - * - * @return Error code - */ - TTransportExceptionType getType() const noexcept { return type_; } - - const char* what() const noexcept override; - -protected: - /** Just like strerror_r but returns a C++ string object. */ - std::string strerror_s(int errno_copy); - - /** Error code */ - TTransportExceptionType type_; -}; - -///** -// * Legacy code in transport implementations have overflow issues -// * that need to be enforced. -// */ -//template To safe_numeric_cast(From i) { -// try { -// return boost::numeric_cast(i); -// } -// catch (const std::bad_cast& bc) { -// throw TTransportException(TTransportException::CORRUPTED_DATA, -// bc.what()); -// } -//} - -} -} -} // duckdb_apache::thrift::transport - -#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ - - -// LICENSE_CHANGE_END - -#include -#include - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -/** - * Helper template to hoist readAll implementation out of TTransport - */ -template -uint32_t readAll(Transport_& trans, uint8_t* buf, uint32_t len) { - uint32_t have = 0; - uint32_t get = 0; - - while (have < len) { - get = trans.read(buf + have, len - have); - if (get <= 0) { - throw TTransportException(TTransportException::END_OF_FILE, "No more data to read."); - } - have += get; - } - - return have; -} - -/** - * Generic interface for a method of transporting data. A TTransport may be - * capable of either reading or writing, but not necessarily both. - * - */ -class TTransport { -public: - /** - * Virtual deconstructor. - */ - virtual ~TTransport() = default; - - /** - * Whether this transport is open. - */ - virtual bool isOpen() const { return false; } - - /** - * Tests whether there is more data to read or if the remote side is - * still open. By default this is true whenever the transport is open, - * but implementations should add logic to test for this condition where - * possible (i.e. on a socket). - * This is used by a server to check if it should listen for another - * request. - */ - virtual bool peek() { return isOpen(); } - - /** - * Opens the transport for communications. - * - * @return bool Whether the transport was successfully opened - * @throws TTransportException if opening failed - */ - virtual void open() { - throw TTransportException(TTransportException::NOT_OPEN, "Cannot open base TTransport."); - } - - /** - * Closes the transport. - */ - virtual void close() { - throw TTransportException(TTransportException::NOT_OPEN, "Cannot close base TTransport."); - } - - /** - * Attempt to read up to the specified number of bytes into the string. - * - * @param buf Reference to the location to write the data - * @param len How many bytes to read - * @return How many bytes were actually read - * @throws TTransportException If an error occurs - */ - uint32_t read(uint8_t* buf, uint32_t len) { - T_VIRTUAL_CALL(); - return read_virt(buf, len); - } - virtual uint32_t read_virt(uint8_t* /* buf */, uint32_t /* len */) { - throw TTransportException(TTransportException::NOT_OPEN, "Base TTransport cannot read."); - } - - /** - * Reads the given amount of data in its entirety no matter what. - * - * @param s Reference to location for read data - * @param len How many bytes to read - * @return How many bytes read, which must be equal to size - * @throws TTransportException If insufficient data was read - */ - uint32_t readAll(uint8_t* buf, uint32_t len) { - T_VIRTUAL_CALL(); - return readAll_virt(buf, len); - } - virtual uint32_t readAll_virt(uint8_t* buf, uint32_t len) { - return duckdb_apache::thrift::transport::readAll(*this, buf, len); - } - - /** - * Called when read is completed. - * This can be over-ridden to perform a transport-specific action - * e.g. logging the request to a file - * - * @return number of bytes read if available, 0 otherwise. - */ - virtual uint32_t readEnd() { - // default behaviour is to do nothing - return 0; - } - - /** - * Writes the string in its entirety to the buffer. - * - * Note: You must call flush() to ensure the data is actually written, - * and available to be read back in the future. Destroying a TTransport - * object does not automatically flush pending data--if you destroy a - * TTransport object with written but unflushed data, that data may be - * discarded. - * - * @param buf The data to write out - * @throws TTransportException if an error occurs - */ - void write(const uint8_t* buf, uint32_t len) { - T_VIRTUAL_CALL(); - write_virt(buf, len); - } - virtual void write_virt(const uint8_t* /* buf */, uint32_t /* len */) { - throw TTransportException(TTransportException::NOT_OPEN, "Base TTransport cannot write."); - } - - /** - * Called when write is completed. - * This can be over-ridden to perform a transport-specific action - * at the end of a request. - * - * @return number of bytes written if available, 0 otherwise - */ - virtual uint32_t writeEnd() { - // default behaviour is to do nothing - return 0; - } - - /** - * Flushes any pending data to be written. Typically used with buffered - * transport mechanisms. - * - * @throws TTransportException if an error occurs - */ - virtual void flush() { - // default behaviour is to do nothing - } - - /** - * Attempts to return a pointer to \c len bytes, possibly copied into \c buf. - * Does not consume the bytes read (i.e.: a later read will return the same - * data). This method is meant to support protocols that need to read - * variable-length fields. They can attempt to borrow the maximum amount of - * data that they will need, then consume (see next method) what they - * actually use. Some transports will not support this method and others - * will fail occasionally, so protocols must be prepared to use read if - * borrow fails. - * - * @oaram buf A buffer where the data can be stored if needed. - * If borrow doesn't return buf, then the contents of - * buf after the call are undefined. This parameter may be - * NULL to indicate that the caller is not supplying storage, - * but would like a pointer into an internal buffer, if - * available. - * @param len *len should initially contain the number of bytes to borrow. - * If borrow succeeds, *len will contain the number of bytes - * available in the returned pointer. This will be at least - * what was requested, but may be more if borrow returns - * a pointer to an internal buffer, rather than buf. - * If borrow fails, the contents of *len are undefined. - * @return If the borrow succeeds, return a pointer to the borrowed data. - * This might be equal to \c buf, or it might be a pointer into - * the transport's internal buffers. - * @throws TTransportException if an error occurs - */ - const uint8_t* borrow(uint8_t* buf, uint32_t* len) { - T_VIRTUAL_CALL(); - return borrow_virt(buf, len); - } - virtual const uint8_t* borrow_virt(uint8_t* /* buf */, uint32_t* /* len */) { return nullptr; } - - /** - * Remove len bytes from the transport. This should always follow a borrow - * of at least len bytes, and should always succeed. - * TODO(dreiss): Is there any transport that could borrow but fail to - * consume, or that would require a buffer to dump the consumed data? - * - * @param len How many bytes to consume - * @throws TTransportException If an error occurs - */ - void consume(uint32_t len) { - T_VIRTUAL_CALL(); - consume_virt(len); - } - virtual void consume_virt(uint32_t /* len */) { - throw TTransportException(TTransportException::NOT_OPEN, "Base TTransport cannot consume."); - } - - /** - * Returns the origin of the transports call. The value depends on the - * transport used. An IP based transport for example will return the - * IP address of the client making the request. - * If the transport doesn't know the origin Unknown is returned. - * - * The returned value can be used in a log message for example - */ - virtual const std::string getOrigin() const { return "Unknown"; } - -protected: - /** - * Simple constructor. - */ - TTransport() = default; -}; - -/** - * Generic factory class to make an input and output transport out of a - * source transport. Commonly used inside servers to make input and output - * streams out of raw clients. - * - */ -class TTransportFactory { -public: - TTransportFactory() = default; - - virtual ~TTransportFactory() = default; - - /** - * Default implementation does nothing, just returns the transport given. - */ - virtual std::shared_ptr getTransport(std::shared_ptr trans) { - return trans; - } -}; -} -} -} // duckdb_apache::thrift::transport - -#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ -#define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ 1 - -#include - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -/** - * Class to encapsulate all the possible types of protocol errors that may - * occur in various protocol systems. This provides a sort of generic - * wrapper around the vague UNIX E_ error codes that lets a common code - * base of error handling to be used for various types of protocols, i.e. - * pipes etc. - * - */ -class TProtocolException : public duckdb_apache::thrift::TException { -public: - /** - * Error codes for the various types of exceptions. - */ - enum TProtocolExceptionType { - UNKNOWN = 0, - INVALID_DATA = 1, - NEGATIVE_SIZE = 2, - SIZE_LIMIT = 3, - BAD_VERSION = 4, - NOT_IMPLEMENTED = 5, - DEPTH_LIMIT = 6 - }; - - TProtocolException() : duckdb_apache::thrift::TException(), type_(UNKNOWN) {} - - TProtocolException(TProtocolExceptionType type) : duckdb_apache::thrift::TException(), type_(type) {} - - TProtocolException(const std::string& message) - : duckdb_apache::thrift::TException(message), type_(UNKNOWN) {} - - TProtocolException(TProtocolExceptionType type, const std::string& message) - : duckdb_apache::thrift::TException(message), type_(type) {} - - ~TProtocolException() noexcept override = default; - - /** - * Returns an error code that provides information about the type of error - * that has occurred. - * - * @return Error code - */ - TProtocolExceptionType getType() const { return type_; } - - const char* what() const noexcept override { - if (message_.empty()) { - switch (type_) { - case UNKNOWN: - return "TProtocolException: Unknown protocol exception"; - case INVALID_DATA: - return "TProtocolException: Invalid data"; - case NEGATIVE_SIZE: - return "TProtocolException: Negative size"; - case SIZE_LIMIT: - return "TProtocolException: Exceeded size limit"; - case BAD_VERSION: - return "TProtocolException: Invalid version"; - case NOT_IMPLEMENTED: - return "TProtocolException: Not implemented"; - default: - return "TProtocolException: (Invalid exception type)"; - } - } else { - return message_.c_str(); - } - } - -protected: - /** - * Error code - */ - TProtocolExceptionType type_; -}; -} -} -} // duckdb_apache::thrift::protocol - -#endif // #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ - - -// LICENSE_CHANGE_END - - -#include - -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include -#include -#include -#include -#include - -// Use this to get around strict aliasing rules. -// For example, uint64_t i = bitwise_cast(returns_double()); -// The most obvious implementation is to just cast a pointer, -// but that doesn't work. -// For a pretty in-depth explanation of the problem, see -// http://cellperformance.beyond3d.com/articles/2006/06/understanding-strict-aliasing.html -namespace duckdb_apache { namespace thrift { -template -static inline To bitwise_cast(From from) { - static_assert(sizeof(From) == sizeof(To), "sizeof(From) == sizeof(To)"); - - // BAD!!! These are all broken with -O2. - // return *reinterpret_cast(&from); // BAD!!! - // return *static_cast(static_cast(&from)); // BAD!!! - // return *(To*)(void*)&from; // BAD!!! - - // Super clean and paritally blessed by section 3.9 of the standard. - // unsigned char c[sizeof(from)]; - // memcpy(c, &from, sizeof(from)); - // To to; - // memcpy(&to, c, sizeof(c)); - // return to; - - // Slightly more questionable. - // Same code emitted by GCC. - // To to; - // memcpy(&to, &from, sizeof(from)); - // return to; - - // Technically undefined, but almost universally supported, - // and the most efficient implementation. - union { - From f; - To t; - } u; - u.f = from; - return u.t; -} -}} // namespace duckdb_apache::thrift - - -#ifdef HAVE_SYS_PARAM_H -#include -#endif - -#ifndef __THRIFT_BYTE_ORDER -# if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN) -# define __THRIFT_BYTE_ORDER BYTE_ORDER -# define __THRIFT_LITTLE_ENDIAN LITTLE_ENDIAN -# define __THRIFT_BIG_ENDIAN BIG_ENDIAN -# else -//# include -# if BOOST_ENDIAN_BIG_BYTE -# define __THRIFT_BYTE_ORDER 4321 -# define __THRIFT_LITTLE_ENDIAN 0 -# define __THRIFT_BIG_ENDIAN __THRIFT_BYTE_ORDER -# elif BOOST_ENDIAN_LITTLE_BYTE -# define __THRIFT_BYTE_ORDER 1234 -# define __THRIFT_LITTLE_ENDIAN __THRIFT_BYTE_ORDER -# define __THRIFT_BIG_ENDIAN 0 -# endif -# ifdef BOOST_LITTLE_ENDIAN -# else -# endif -# endif -#endif - -#if __THRIFT_BYTE_ORDER == __THRIFT_BIG_ENDIAN -# if !defined(THRIFT_ntohll) -# define THRIFT_ntohll(n) (n) -# define THRIFT_htonll(n) (n) -# endif -# if defined(__GNUC__) && defined(__GLIBC__) -# include -# define THRIFT_htolell(n) bswap_64(n) -# define THRIFT_letohll(n) bswap_64(n) -# define THRIFT_htolel(n) bswap_32(n) -# define THRIFT_letohl(n) bswap_32(n) -# define THRIFT_htoles(n) bswap_16(n) -# define THRIFT_letohs(n) bswap_16(n) -# else /* GNUC & GLIBC */ -# define bswap_64(n) \ - ( (((n) & 0xff00000000000000ull) >> 56) \ - | (((n) & 0x00ff000000000000ull) >> 40) \ - | (((n) & 0x0000ff0000000000ull) >> 24) \ - | (((n) & 0x000000ff00000000ull) >> 8) \ - | (((n) & 0x00000000ff000000ull) << 8) \ - | (((n) & 0x0000000000ff0000ull) << 24) \ - | (((n) & 0x000000000000ff00ull) << 40) \ - | (((n) & 0x00000000000000ffull) << 56) ) -# define bswap_32(n) \ - ( (((n) & 0xff000000ul) >> 24) \ - | (((n) & 0x00ff0000ul) >> 8) \ - | (((n) & 0x0000ff00ul) << 8) \ - | (((n) & 0x000000fful) << 24) ) -# define bswap_16(n) \ - ( (((n) & ((unsigned short)0xff00ul)) >> 8) \ - | (((n) & ((unsigned short)0x00fful)) << 8) ) -# define THRIFT_htolell(n) bswap_64(n) -# define THRIFT_letohll(n) bswap_64(n) -# define THRIFT_htolel(n) bswap_32(n) -# define THRIFT_letohl(n) bswap_32(n) -# define THRIFT_htoles(n) bswap_16(n) -# define THRIFT_letohs(n) bswap_16(n) -# endif /* GNUC & GLIBC */ -#elif __THRIFT_BYTE_ORDER == __THRIFT_LITTLE_ENDIAN -# define THRIFT_htolell(n) (n) -# define THRIFT_letohll(n) (n) -# define THRIFT_htolel(n) (n) -# define THRIFT_letohl(n) (n) -# define THRIFT_htoles(n) (n) -# define THRIFT_letohs(n) (n) -# if defined(__GNUC__) && defined(__GLIBC__) -# include -# define THRIFT_ntohll(n) bswap_64(n) -# define THRIFT_htonll(n) bswap_64(n) -# elif defined(_MSC_VER) /* Microsoft Visual C++ */ -# define THRIFT_ntohll(n) ( _byteswap_uint64((uint64_t)n) ) -# define THRIFT_htonll(n) ( _byteswap_uint64((uint64_t)n) ) -# elif !defined(THRIFT_ntohll) /* Not GNUC/GLIBC or MSVC */ -# define THRIFT_ntohll(n) ( (((uint64_t)ntohl((uint32_t)n)) << 32) + ntohl((uint32_t)(n >> 32)) ) -# define THRIFT_htonll(n) ( (((uint64_t)htonl((uint32_t)n)) << 32) + htonl((uint32_t)(n >> 32)) ) -# endif /* GNUC/GLIBC or MSVC or something else */ -#else /* __THRIFT_BYTE_ORDER */ -# error "Can't define THRIFT_htonll or THRIFT_ntohll!" -#endif - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -using duckdb_apache::thrift::transport::TTransport; - -/** - * Enumerated definition of the types that the Thrift protocol supports. - * Take special note of the T_END type which is used specifically to mark - * the end of a sequence of fields. - */ -enum TType { - T_STOP = 0, - T_VOID = 1, - T_BOOL = 2, - T_BYTE = 3, - T_I08 = 3, - T_I16 = 6, - T_I32 = 8, - T_U64 = 9, - T_I64 = 10, - T_DOUBLE = 4, - T_STRING = 11, - T_UTF7 = 11, - T_STRUCT = 12, - T_MAP = 13, - T_SET = 14, - T_LIST = 15, - T_UTF8 = 16, - T_UTF16 = 17 -}; - -/** - * Enumerated definition of the message types that the Thrift protocol - * supports. - */ -enum TMessageType { - T_CALL = 1, - T_REPLY = 2, - T_EXCEPTION = 3, - T_ONEWAY = 4 -}; - -static const uint32_t DEFAULT_RECURSION_LIMIT = 64; - -/** - * Abstract class for a thrift protocol driver. These are all the methods that - * a protocol must implement. Essentially, there must be some way of reading - * and writing all the base types, plus a mechanism for writing out structs - * with indexed fields. - * - * TProtocol objects should not be shared across multiple encoding contexts, - * as they may need to maintain internal state in some protocols (i.e. XML). - * Note that is is acceptable for the TProtocol module to do its own internal - * buffered reads/writes to the underlying TTransport where appropriate (i.e. - * when parsing an input XML stream, reading should be batched rather than - * looking ahead character by character for a close tag). - * - */ -class TProtocol { -public: - virtual ~TProtocol(); - - /** - * Writing functions. - */ - - virtual uint32_t writeMessageBegin_virt(const std::string& name, - const TMessageType messageType, - const int32_t seqid) = 0; - - virtual uint32_t writeMessageEnd_virt() = 0; - - virtual uint32_t writeStructBegin_virt(const char* name) = 0; - - virtual uint32_t writeStructEnd_virt() = 0; - - virtual uint32_t writeFieldBegin_virt(const char* name, - const TType fieldType, - const int16_t fieldId) = 0; - - virtual uint32_t writeFieldEnd_virt() = 0; - - virtual uint32_t writeFieldStop_virt() = 0; - - virtual uint32_t writeMapBegin_virt(const TType keyType, const TType valType, const uint32_t size) - = 0; - - virtual uint32_t writeMapEnd_virt() = 0; - - virtual uint32_t writeListBegin_virt(const TType elemType, const uint32_t size) = 0; - - virtual uint32_t writeListEnd_virt() = 0; - - virtual uint32_t writeSetBegin_virt(const TType elemType, const uint32_t size) = 0; - - virtual uint32_t writeSetEnd_virt() = 0; - - virtual uint32_t writeBool_virt(const bool value) = 0; - - virtual uint32_t writeByte_virt(const int8_t byte) = 0; - - virtual uint32_t writeI16_virt(const int16_t i16) = 0; - - virtual uint32_t writeI32_virt(const int32_t i32) = 0; - - virtual uint32_t writeI64_virt(const int64_t i64) = 0; - - virtual uint32_t writeDouble_virt(const double dub) = 0; - - virtual uint32_t writeString_virt(const std::string& str) = 0; - - virtual uint32_t writeBinary_virt(const std::string& str) = 0; - - uint32_t writeMessageBegin(const std::string& name, - const TMessageType messageType, - const int32_t seqid) { - T_VIRTUAL_CALL(); - return writeMessageBegin_virt(name, messageType, seqid); - } - - uint32_t writeMessageEnd() { - T_VIRTUAL_CALL(); - return writeMessageEnd_virt(); - } - - uint32_t writeStructBegin(const char* name) { - T_VIRTUAL_CALL(); - return writeStructBegin_virt(name); - } - - uint32_t writeStructEnd() { - T_VIRTUAL_CALL(); - return writeStructEnd_virt(); - } - - uint32_t writeFieldBegin(const char* name, const TType fieldType, const int16_t fieldId) { - T_VIRTUAL_CALL(); - return writeFieldBegin_virt(name, fieldType, fieldId); - } - - uint32_t writeFieldEnd() { - T_VIRTUAL_CALL(); - return writeFieldEnd_virt(); - } - - uint32_t writeFieldStop() { - T_VIRTUAL_CALL(); - return writeFieldStop_virt(); - } - - uint32_t writeMapBegin(const TType keyType, const TType valType, const uint32_t size) { - T_VIRTUAL_CALL(); - return writeMapBegin_virt(keyType, valType, size); - } - - uint32_t writeMapEnd() { - T_VIRTUAL_CALL(); - return writeMapEnd_virt(); - } - - uint32_t writeListBegin(const TType elemType, const uint32_t size) { - T_VIRTUAL_CALL(); - return writeListBegin_virt(elemType, size); - } - - uint32_t writeListEnd() { - T_VIRTUAL_CALL(); - return writeListEnd_virt(); - } - - uint32_t writeSetBegin(const TType elemType, const uint32_t size) { - T_VIRTUAL_CALL(); - return writeSetBegin_virt(elemType, size); - } - - uint32_t writeSetEnd() { - T_VIRTUAL_CALL(); - return writeSetEnd_virt(); - } - - uint32_t writeBool(const bool value) { - T_VIRTUAL_CALL(); - return writeBool_virt(value); - } - - uint32_t writeByte(const int8_t byte) { - T_VIRTUAL_CALL(); - return writeByte_virt(byte); - } - - uint32_t writeI16(const int16_t i16) { - T_VIRTUAL_CALL(); - return writeI16_virt(i16); - } - - uint32_t writeI32(const int32_t i32) { - T_VIRTUAL_CALL(); - return writeI32_virt(i32); - } - - uint32_t writeI64(const int64_t i64) { - T_VIRTUAL_CALL(); - return writeI64_virt(i64); - } - - uint32_t writeDouble(const double dub) { - T_VIRTUAL_CALL(); - return writeDouble_virt(dub); - } - - uint32_t writeString(const std::string& str) { - T_VIRTUAL_CALL(); - return writeString_virt(str); - } - - uint32_t writeBinary(const std::string& str) { - T_VIRTUAL_CALL(); - return writeBinary_virt(str); - } - - /** - * Reading functions - */ - - virtual uint32_t readMessageBegin_virt(std::string& name, - TMessageType& messageType, - int32_t& seqid) = 0; - - virtual uint32_t readMessageEnd_virt() = 0; - - virtual uint32_t readStructBegin_virt(std::string& name) = 0; - - virtual uint32_t readStructEnd_virt() = 0; - - virtual uint32_t readFieldBegin_virt(std::string& name, TType& fieldType, int16_t& fieldId) = 0; - - virtual uint32_t readFieldEnd_virt() = 0; - - virtual uint32_t readMapBegin_virt(TType& keyType, TType& valType, uint32_t& size) = 0; - - virtual uint32_t readMapEnd_virt() = 0; - - virtual uint32_t readListBegin_virt(TType& elemType, uint32_t& size) = 0; - - virtual uint32_t readListEnd_virt() = 0; - - virtual uint32_t readSetBegin_virt(TType& elemType, uint32_t& size) = 0; - - virtual uint32_t readSetEnd_virt() = 0; - - virtual uint32_t readBool_virt(bool& value) = 0; - - virtual uint32_t readBool_virt(std::vector::reference value) = 0; - - virtual uint32_t readByte_virt(int8_t& byte) = 0; - - virtual uint32_t readI16_virt(int16_t& i16) = 0; - - virtual uint32_t readI32_virt(int32_t& i32) = 0; - - virtual uint32_t readI64_virt(int64_t& i64) = 0; - - virtual uint32_t readDouble_virt(double& dub) = 0; - - virtual uint32_t readString_virt(std::string& str) = 0; - - virtual uint32_t readBinary_virt(std::string& str) = 0; - - uint32_t readMessageBegin(std::string& name, TMessageType& messageType, int32_t& seqid) { - T_VIRTUAL_CALL(); - return readMessageBegin_virt(name, messageType, seqid); - } - - uint32_t readMessageEnd() { - T_VIRTUAL_CALL(); - return readMessageEnd_virt(); - } - - uint32_t readStructBegin(std::string& name) { - T_VIRTUAL_CALL(); - return readStructBegin_virt(name); - } - - uint32_t readStructEnd() { - T_VIRTUAL_CALL(); - return readStructEnd_virt(); - } - - uint32_t readFieldBegin(std::string& name, TType& fieldType, int16_t& fieldId) { - T_VIRTUAL_CALL(); - return readFieldBegin_virt(name, fieldType, fieldId); - } - - uint32_t readFieldEnd() { - T_VIRTUAL_CALL(); - return readFieldEnd_virt(); - } - - uint32_t readMapBegin(TType& keyType, TType& valType, uint32_t& size) { - T_VIRTUAL_CALL(); - return readMapBegin_virt(keyType, valType, size); - } - - uint32_t readMapEnd() { - T_VIRTUAL_CALL(); - return readMapEnd_virt(); - } - - uint32_t readListBegin(TType& elemType, uint32_t& size) { - T_VIRTUAL_CALL(); - return readListBegin_virt(elemType, size); - } - - uint32_t readListEnd() { - T_VIRTUAL_CALL(); - return readListEnd_virt(); - } - - uint32_t readSetBegin(TType& elemType, uint32_t& size) { - T_VIRTUAL_CALL(); - return readSetBegin_virt(elemType, size); - } - - uint32_t readSetEnd() { - T_VIRTUAL_CALL(); - return readSetEnd_virt(); - } - - uint32_t readBool(bool& value) { - T_VIRTUAL_CALL(); - return readBool_virt(value); - } - - uint32_t readByte(int8_t& byte) { - T_VIRTUAL_CALL(); - return readByte_virt(byte); - } - - uint32_t readI16(int16_t& i16) { - T_VIRTUAL_CALL(); - return readI16_virt(i16); - } - - uint32_t readI32(int32_t& i32) { - T_VIRTUAL_CALL(); - return readI32_virt(i32); - } - - uint32_t readI64(int64_t& i64) { - T_VIRTUAL_CALL(); - return readI64_virt(i64); - } - - uint32_t readDouble(double& dub) { - T_VIRTUAL_CALL(); - return readDouble_virt(dub); - } - - uint32_t readString(std::string& str) { - T_VIRTUAL_CALL(); - return readString_virt(str); - } - - uint32_t readBinary(std::string& str) { - T_VIRTUAL_CALL(); - return readBinary_virt(str); - } - - /* - * std::vector is specialized for bool, and its elements are individual bits - * rather than bools. We need to define a different version of readBool() - * to work with std::vector. - */ - uint32_t readBool(std::vector::reference value) { - T_VIRTUAL_CALL(); - return readBool_virt(value); - } - - /** - * Method to arbitrarily skip over data. - */ - uint32_t skip(TType type) { - T_VIRTUAL_CALL(); - return skip_virt(type); - } - virtual uint32_t skip_virt(TType type); - - inline std::shared_ptr getTransport() { return ptrans_; } - - // TODO: remove these two calls, they are for backwards - // compatibility - inline std::shared_ptr getInputTransport() { return ptrans_; } - inline std::shared_ptr getOutputTransport() { return ptrans_; } - - // input and output recursion depth are kept separate so that one protocol - // can be used concurrently for both input and output. - void incrementInputRecursionDepth() { - if (recursion_limit_ < ++input_recursion_depth_) { - throw TProtocolException(TProtocolException::DEPTH_LIMIT); - } - } - void decrementInputRecursionDepth() { --input_recursion_depth_; } - - void incrementOutputRecursionDepth() { - if (recursion_limit_ < ++output_recursion_depth_) { - throw TProtocolException(TProtocolException::DEPTH_LIMIT); - } - } - void decrementOutputRecursionDepth() { --output_recursion_depth_; } - - uint32_t getRecursionLimit() const {return recursion_limit_;} - void setRecurisionLimit(uint32_t depth) {recursion_limit_ = depth;} - -protected: - TProtocol(std::shared_ptr ptrans) - : ptrans_(ptrans), input_recursion_depth_(0), output_recursion_depth_(0), recursion_limit_(DEFAULT_RECURSION_LIMIT) - {} - - std::shared_ptr ptrans_; - -private: - TProtocol() = default; - uint32_t input_recursion_depth_; - uint32_t output_recursion_depth_; - uint32_t recursion_limit_; -}; - -/** - * Constructs input and output protocol objects given transports. - */ -class TProtocolFactory { -public: - TProtocolFactory() = default; - - virtual ~TProtocolFactory(); - - virtual std::shared_ptr getProtocol(std::shared_ptr trans) = 0; - virtual std::shared_ptr getProtocol(std::shared_ptr inTrans, - std::shared_ptr outTrans) { - (void)outTrans; - return getProtocol(inTrans); - } -}; - -/** - * Dummy protocol class. - * - * This class does nothing, and should never be instantiated. - * It is used only by the generator code. - */ -class TDummyProtocol : public TProtocol {}; - - -// HM: this is sub-optimal since it creates a depencency even for memory-only struct -//// This is the default / legacy choice -//struct TNetworkBigEndian -//{ -// static uint16_t toWire16(uint16_t x) {return htons(x);} -// static uint32_t toWire32(uint32_t x) {return htonl(x);} -// static uint64_t toWire64(uint64_t x) {return THRIFT_htonll(x);} -// static uint16_t fromWire16(uint16_t x) {return ntohs(x);} -// static uint32_t fromWire32(uint32_t x) {return ntohl(x);} -// static uint64_t fromWire64(uint64_t x) {return THRIFT_ntohll(x);} -//}; -// -//// On most systems, this will be a bit faster than TNetworkBigEndian -//struct TNetworkLittleEndian -//{ -// static uint16_t toWire16(uint16_t x) {return THRIFT_htoles(x);} -// static uint32_t toWire32(uint32_t x) {return THRIFT_htolel(x);} -// static uint64_t toWire64(uint64_t x) {return THRIFT_htolell(x);} -// static uint16_t fromWire16(uint16_t x) {return THRIFT_letohs(x);} -// static uint32_t fromWire32(uint32_t x) {return THRIFT_letohl(x);} -// static uint64_t fromWire64(uint64_t x) {return THRIFT_letohll(x);} -//}; - -struct TOutputRecursionTracker { - TProtocol &prot_; - TOutputRecursionTracker(TProtocol &prot) : prot_(prot) { - prot_.incrementOutputRecursionDepth(); - } - ~TOutputRecursionTracker() { - prot_.decrementOutputRecursionDepth(); - } -}; - -struct TInputRecursionTracker { - TProtocol &prot_; - TInputRecursionTracker(TProtocol &prot) : prot_(prot) { - prot_.incrementInputRecursionDepth(); - } - ~TInputRecursionTracker() { - prot_.decrementInputRecursionDepth(); - } -}; - -/** - * Helper template for implementing TProtocol::skip(). - * - * Templatized to avoid having to make virtual function calls. - */ -template -uint32_t skip(Protocol_& prot, TType type) { - TInputRecursionTracker tracker(prot); - - switch (type) { - case T_BOOL: { - bool boolv; - return prot.readBool(boolv); - } - case T_BYTE: { - int8_t bytev = 0; - return prot.readByte(bytev); - } - case T_I16: { - int16_t i16; - return prot.readI16(i16); - } - case T_I32: { - int32_t i32; - return prot.readI32(i32); - } - case T_I64: { - int64_t i64; - return prot.readI64(i64); - } - case T_DOUBLE: { - double dub; - return prot.readDouble(dub); - } - case T_STRING: { - std::string str; - return prot.readBinary(str); - } - case T_STRUCT: { - uint32_t result = 0; - std::string name; - int16_t fid; - TType ftype; - result += prot.readStructBegin(name); - while (true) { - result += prot.readFieldBegin(name, ftype, fid); - if (ftype == T_STOP) { - break; - } - result += skip(prot, ftype); - result += prot.readFieldEnd(); - } - result += prot.readStructEnd(); - return result; - } - case T_MAP: { - uint32_t result = 0; - TType keyType; - TType valType; - uint32_t i, size; - result += prot.readMapBegin(keyType, valType, size); - for (i = 0; i < size; i++) { - result += skip(prot, keyType); - result += skip(prot, valType); - } - result += prot.readMapEnd(); - return result; - } - case T_SET: { - uint32_t result = 0; - TType elemType; - uint32_t i, size; - result += prot.readSetBegin(elemType, size); - for (i = 0; i < size; i++) { - result += skip(prot, elemType); - } - result += prot.readSetEnd(); - return result; - } - case T_LIST: { - uint32_t result = 0; - TType elemType; - uint32_t i, size; - result += prot.readListBegin(elemType, size); - for (i = 0; i < size; i++) { - result += skip(prot, elemType); - } - result += prot.readListEnd(); - return result; - } - default: - break; - } - - throw TProtocolException(TProtocolException::INVALID_DATA, - "invalid TType"); -} - -}}} // duckdb_apache::thrift::protocol - -#endif // #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1 - - -// LICENSE_CHANGE_END - - -namespace duckdb_apache { -namespace thrift { - -class TBase { -public: - virtual ~TBase() = default; - virtual uint32_t read(protocol::TProtocol* iprot) = 0; - virtual uint32_t write(protocol::TProtocol* oprot) const = 0; -}; -} -} // duckdb_apache::thrift - -#endif // #ifndef _DUCKDB_THRIFT_TBASE_H_ - - -// LICENSE_CHANGE_END - - - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - - - -// LICENSE_CHANGE_END - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 -// See the end of this file for a list - - - -#ifdef _WIN32 -#undef CREATE_NEW -#undef OPTIONAL -#undef Realloc -#undef min -#undef max -#endif - -// LICENSE_CHANGE_END - - -namespace duckdb_parquet { namespace format { - -struct Type { - enum type { - BOOLEAN = 0, - INT32 = 1, - INT64 = 2, - INT96 = 3, - FLOAT = 4, - DOUBLE = 5, - BYTE_ARRAY = 6, - FIXED_LEN_BYTE_ARRAY = 7 - }; -}; - -extern const std::map _Type_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const Type::type& val); - -struct ConvertedType { - enum type { - UTF8 = 0, - MAP = 1, - MAP_KEY_VALUE = 2, - LIST = 3, - ENUM = 4, - DECIMAL = 5, - DATE = 6, - TIME_MILLIS = 7, - TIME_MICROS = 8, - TIMESTAMP_MILLIS = 9, - TIMESTAMP_MICROS = 10, - UINT_8 = 11, - UINT_16 = 12, - UINT_32 = 13, - UINT_64 = 14, - INT_8 = 15, - INT_16 = 16, - INT_32 = 17, - INT_64 = 18, - JSON = 19, - BSON = 20, - INTERVAL = 21 - }; -}; - -extern const std::map _ConvertedType_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val); - -struct FieldRepetitionType { - enum type { - REQUIRED = 0, - OPTIONAL = 1, - REPEATED = 2 - }; -}; - -extern const std::map _FieldRepetitionType_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val); - -struct Encoding { - enum type { - PLAIN = 0, - PLAIN_DICTIONARY = 2, - RLE = 3, - BIT_PACKED = 4, - DELTA_BINARY_PACKED = 5, - DELTA_LENGTH_BYTE_ARRAY = 6, - DELTA_BYTE_ARRAY = 7, - RLE_DICTIONARY = 8 - }; -}; - -extern const std::map _Encoding_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const Encoding::type& val); - -struct CompressionCodec { - enum type { - UNCOMPRESSED = 0, - SNAPPY = 1, - GZIP = 2, - LZO = 3, - BROTLI = 4, - LZ4 = 5, - ZSTD = 6 - }; -}; - -extern const std::map _CompressionCodec_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val); - -struct PageType { - enum type { - DATA_PAGE = 0, - INDEX_PAGE = 1, - DICTIONARY_PAGE = 2, - DATA_PAGE_V2 = 3 - }; -}; - -extern const std::map _PageType_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const PageType::type& val); - -struct BoundaryOrder { - enum type { - UNORDERED = 0, - ASCENDING = 1, - DESCENDING = 2 - }; -}; - -extern const std::map _BoundaryOrder_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val); - -class Statistics; - -class StringType; - -class UUIDType; - -class MapType; - -class ListType; - -class EnumType; - -class DateType; - -class NullType; - -class DecimalType; - -class MilliSeconds; - -class MicroSeconds; - -class NanoSeconds; - -class TimeUnit; - -class TimestampType; - -class TimeType; - -class IntType; - -class JsonType; - -class BsonType; - -class LogicalType; - -class SchemaElement; - -class DataPageHeader; - -class IndexPageHeader; - -class DictionaryPageHeader; - -class DataPageHeaderV2; - -class PageHeader; - -class KeyValue; - -class SortingColumn; - -class PageEncodingStats; - -class ColumnMetaData; - -class EncryptionWithFooterKey; - -class EncryptionWithColumnKey; - -class ColumnCryptoMetaData; - -class ColumnChunk; - -class RowGroup; - -class TypeDefinedOrder; - -class ColumnOrder; - -class PageLocation; - -class OffsetIndex; - -class ColumnIndex; - -class AesGcmV1; - -class AesGcmCtrV1; - -class EncryptionAlgorithm; - -class FileMetaData; - -class FileCryptoMetaData; - -typedef struct _Statistics__isset { - _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false) {} - bool max :1; - bool min :1; - bool null_count :1; - bool distinct_count :1; - bool max_value :1; - bool min_value :1; -} _Statistics__isset; - -class Statistics : public virtual ::duckdb_apache::thrift::TBase { - public: - - Statistics(const Statistics&); - Statistics& operator=(const Statistics&); - Statistics() : max(), min(), null_count(0), distinct_count(0), max_value(), min_value() { - } - - virtual ~Statistics() throw(); - std::string max; - std::string min; - int64_t null_count; - int64_t distinct_count; - std::string max_value; - std::string min_value; - - _Statistics__isset __isset; - - void __set_max(const std::string& val); - - void __set_min(const std::string& val); - - void __set_null_count(const int64_t val); - - void __set_distinct_count(const int64_t val); - - void __set_max_value(const std::string& val); - - void __set_min_value(const std::string& val); - - bool operator == (const Statistics & rhs) const - { - if (__isset.max != rhs.__isset.max) - return false; - else if (__isset.max && !(max == rhs.max)) - return false; - if (__isset.min != rhs.__isset.min) - return false; - else if (__isset.min && !(min == rhs.min)) - return false; - if (__isset.null_count != rhs.__isset.null_count) - return false; - else if (__isset.null_count && !(null_count == rhs.null_count)) - return false; - if (__isset.distinct_count != rhs.__isset.distinct_count) - return false; - else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) - return false; - if (__isset.max_value != rhs.__isset.max_value) - return false; - else if (__isset.max_value && !(max_value == rhs.max_value)) - return false; - if (__isset.min_value != rhs.__isset.min_value) - return false; - else if (__isset.min_value && !(min_value == rhs.min_value)) - return false; - return true; - } - bool operator != (const Statistics &rhs) const { - return !(*this == rhs); - } - - bool operator < (const Statistics & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(Statistics &a, Statistics &b); - -std::ostream& operator<<(std::ostream& out, const Statistics& obj); - - -class StringType : public virtual ::duckdb_apache::thrift::TBase { - public: - - StringType(const StringType&); - StringType& operator=(const StringType&); - StringType() { - } - - virtual ~StringType() throw(); - - bool operator == (const StringType & /* rhs */) const - { - return true; - } - bool operator != (const StringType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const StringType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(StringType &a, StringType &b); - -std::ostream& operator<<(std::ostream& out, const StringType& obj); - - -class UUIDType : public virtual ::duckdb_apache::thrift::TBase { - public: - - UUIDType(const UUIDType&); - UUIDType& operator=(const UUIDType&); - UUIDType() { - } - - virtual ~UUIDType() throw(); - - bool operator == (const UUIDType & /* rhs */) const - { - return true; - } - bool operator != (const UUIDType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const UUIDType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(UUIDType &a, UUIDType &b); - -std::ostream& operator<<(std::ostream& out, const UUIDType& obj); - - -class MapType : public virtual ::duckdb_apache::thrift::TBase { - public: - - MapType(const MapType&); - MapType& operator=(const MapType&); - MapType() { - } - - virtual ~MapType() throw(); - - bool operator == (const MapType & /* rhs */) const - { - return true; - } - bool operator != (const MapType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const MapType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MapType &a, MapType &b); - -std::ostream& operator<<(std::ostream& out, const MapType& obj); - - -class ListType : public virtual ::duckdb_apache::thrift::TBase { - public: - - ListType(const ListType&); - ListType& operator=(const ListType&); - ListType() { - } - - virtual ~ListType() throw(); - - bool operator == (const ListType & /* rhs */) const - { - return true; - } - bool operator != (const ListType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ListType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ListType &a, ListType &b); - -std::ostream& operator<<(std::ostream& out, const ListType& obj); - - -class EnumType : public virtual ::duckdb_apache::thrift::TBase { - public: - - EnumType(const EnumType&); - EnumType& operator=(const EnumType&); - EnumType() { - } - - virtual ~EnumType() throw(); - - bool operator == (const EnumType & /* rhs */) const - { - return true; - } - bool operator != (const EnumType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const EnumType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EnumType &a, EnumType &b); - -std::ostream& operator<<(std::ostream& out, const EnumType& obj); - - -class DateType : public virtual ::duckdb_apache::thrift::TBase { - public: - - DateType(const DateType&); - DateType& operator=(const DateType&); - DateType() { - } - - virtual ~DateType() throw(); - - bool operator == (const DateType & /* rhs */) const - { - return true; - } - bool operator != (const DateType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DateType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DateType &a, DateType &b); - -std::ostream& operator<<(std::ostream& out, const DateType& obj); - - -class NullType : public virtual ::duckdb_apache::thrift::TBase { - public: - - NullType(const NullType&); - NullType& operator=(const NullType&); - NullType() { - } - - virtual ~NullType() throw(); - - bool operator == (const NullType & /* rhs */) const - { - return true; - } - bool operator != (const NullType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const NullType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(NullType &a, NullType &b); - -std::ostream& operator<<(std::ostream& out, const NullType& obj); - - -class DecimalType : public virtual ::duckdb_apache::thrift::TBase { - public: - - DecimalType(const DecimalType&); - DecimalType& operator=(const DecimalType&); - DecimalType() : scale(0), precision(0) { - } - - virtual ~DecimalType() throw(); - int32_t scale; - int32_t precision; - - void __set_scale(const int32_t val); - - void __set_precision(const int32_t val); - - bool operator == (const DecimalType & rhs) const - { - if (!(scale == rhs.scale)) - return false; - if (!(precision == rhs.precision)) - return false; - return true; - } - bool operator != (const DecimalType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DecimalType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DecimalType &a, DecimalType &b); - -std::ostream& operator<<(std::ostream& out, const DecimalType& obj); - - -class MilliSeconds : public virtual ::duckdb_apache::thrift::TBase { - public: - - MilliSeconds(const MilliSeconds&); - MilliSeconds& operator=(const MilliSeconds&); - MilliSeconds() { - } - - virtual ~MilliSeconds() throw(); - - bool operator == (const MilliSeconds & /* rhs */) const - { - return true; - } - bool operator != (const MilliSeconds &rhs) const { - return !(*this == rhs); - } - - bool operator < (const MilliSeconds & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MilliSeconds &a, MilliSeconds &b); - -std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj); - - -class MicroSeconds : public virtual ::duckdb_apache::thrift::TBase { - public: - - MicroSeconds(const MicroSeconds&); - MicroSeconds& operator=(const MicroSeconds&); - MicroSeconds() { - } - - virtual ~MicroSeconds() throw(); - - bool operator == (const MicroSeconds & /* rhs */) const - { - return true; - } - bool operator != (const MicroSeconds &rhs) const { - return !(*this == rhs); - } - - bool operator < (const MicroSeconds & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MicroSeconds &a, MicroSeconds &b); - -std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj); - - -class NanoSeconds : public virtual ::duckdb_apache::thrift::TBase { - public: - - NanoSeconds(const NanoSeconds&); - NanoSeconds& operator=(const NanoSeconds&); - NanoSeconds() { - } - - virtual ~NanoSeconds() throw(); - - bool operator == (const NanoSeconds & /* rhs */) const - { - return true; - } - bool operator != (const NanoSeconds &rhs) const { - return !(*this == rhs); - } - - bool operator < (const NanoSeconds & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(NanoSeconds &a, NanoSeconds &b); - -std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj); - -typedef struct _TimeUnit__isset { - _TimeUnit__isset() : MILLIS(false), MICROS(false), NANOS(false) {} - bool MILLIS :1; - bool MICROS :1; - bool NANOS :1; -} _TimeUnit__isset; - -class TimeUnit : public virtual ::duckdb_apache::thrift::TBase { - public: - - TimeUnit(const TimeUnit&); - TimeUnit& operator=(const TimeUnit&); - TimeUnit() { - } - - virtual ~TimeUnit() throw(); - MilliSeconds MILLIS; - MicroSeconds MICROS; - NanoSeconds NANOS; - - _TimeUnit__isset __isset; - - void __set_MILLIS(const MilliSeconds& val); - - void __set_MICROS(const MicroSeconds& val); - - void __set_NANOS(const NanoSeconds& val); - - bool operator == (const TimeUnit & rhs) const - { - if (__isset.MILLIS != rhs.__isset.MILLIS) - return false; - else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS)) - return false; - if (__isset.MICROS != rhs.__isset.MICROS) - return false; - else if (__isset.MICROS && !(MICROS == rhs.MICROS)) - return false; - if (__isset.NANOS != rhs.__isset.NANOS) - return false; - else if (__isset.NANOS && !(NANOS == rhs.NANOS)) - return false; - return true; - } - bool operator != (const TimeUnit &rhs) const { - return !(*this == rhs); - } - - bool operator < (const TimeUnit & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimeUnit &a, TimeUnit &b); - -std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); - - -class TimestampType : public virtual ::duckdb_apache::thrift::TBase { - public: - - TimestampType(const TimestampType&); - TimestampType& operator=(const TimestampType&); - TimestampType() : isAdjustedToUTC(0) { - } - - virtual ~TimestampType() throw(); - bool isAdjustedToUTC; - TimeUnit unit; - - void __set_isAdjustedToUTC(const bool val); - - void __set_unit(const TimeUnit& val); - - bool operator == (const TimestampType & rhs) const - { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } - bool operator != (const TimestampType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const TimestampType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimestampType &a, TimestampType &b); - -std::ostream& operator<<(std::ostream& out, const TimestampType& obj); - - -class TimeType : public virtual ::duckdb_apache::thrift::TBase { - public: - - TimeType(const TimeType&); - TimeType& operator=(const TimeType&); - TimeType() : isAdjustedToUTC(0) { - } - - virtual ~TimeType() throw(); - bool isAdjustedToUTC; - TimeUnit unit; - - void __set_isAdjustedToUTC(const bool val); - - void __set_unit(const TimeUnit& val); - - bool operator == (const TimeType & rhs) const - { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } - bool operator != (const TimeType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const TimeType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimeType &a, TimeType &b); - -std::ostream& operator<<(std::ostream& out, const TimeType& obj); - - -class IntType : public virtual ::duckdb_apache::thrift::TBase { - public: - - IntType(const IntType&); - IntType& operator=(const IntType&); - IntType() : bitWidth(0), isSigned(0) { - } - - virtual ~IntType() throw(); - int8_t bitWidth; - bool isSigned; - - void __set_bitWidth(const int8_t val); - - void __set_isSigned(const bool val); - - bool operator == (const IntType & rhs) const - { - if (!(bitWidth == rhs.bitWidth)) - return false; - if (!(isSigned == rhs.isSigned)) - return false; - return true; - } - bool operator != (const IntType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const IntType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(IntType &a, IntType &b); - -std::ostream& operator<<(std::ostream& out, const IntType& obj); - - -class JsonType : public virtual ::duckdb_apache::thrift::TBase { - public: - - JsonType(const JsonType&); - JsonType& operator=(const JsonType&); - JsonType() { - } - - virtual ~JsonType() throw(); - - bool operator == (const JsonType & /* rhs */) const - { - return true; - } - bool operator != (const JsonType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const JsonType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(JsonType &a, JsonType &b); - -std::ostream& operator<<(std::ostream& out, const JsonType& obj); - - -class BsonType : public virtual ::duckdb_apache::thrift::TBase { - public: - - BsonType(const BsonType&); - BsonType& operator=(const BsonType&); - BsonType() { - } - - virtual ~BsonType() throw(); - - bool operator == (const BsonType & /* rhs */) const - { - return true; - } - bool operator != (const BsonType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const BsonType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(BsonType &a, BsonType &b); - -std::ostream& operator<<(std::ostream& out, const BsonType& obj); - -typedef struct _LogicalType__isset { - _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false) {} - bool STRING :1; - bool MAP :1; - bool LIST :1; - bool ENUM :1; - bool DECIMAL :1; - bool DATE :1; - bool TIME :1; - bool TIMESTAMP :1; - bool INTEGER :1; - bool UNKNOWN :1; - bool JSON :1; - bool BSON :1; - bool UUID :1; -} _LogicalType__isset; - -class LogicalType : public virtual ::duckdb_apache::thrift::TBase { - public: - - LogicalType(const LogicalType&); - LogicalType& operator=(const LogicalType&); - LogicalType() { - } - - virtual ~LogicalType() throw(); - StringType STRING; - MapType MAP; - ListType LIST; - EnumType ENUM; - DecimalType DECIMAL; - DateType DATE; - TimeType TIME; - TimestampType TIMESTAMP; - IntType INTEGER; - NullType UNKNOWN; - JsonType JSON; - BsonType BSON; - UUIDType UUID; - - _LogicalType__isset __isset; - - void __set_STRING(const StringType& val); - - void __set_MAP(const MapType& val); - - void __set_LIST(const ListType& val); - - void __set_ENUM(const EnumType& val); - - void __set_DECIMAL(const DecimalType& val); - - void __set_DATE(const DateType& val); - - void __set_TIME(const TimeType& val); - - void __set_TIMESTAMP(const TimestampType& val); - - void __set_INTEGER(const IntType& val); - - void __set_UNKNOWN(const NullType& val); - - void __set_JSON(const JsonType& val); - - void __set_BSON(const BsonType& val); - - void __set_UUID(const UUIDType& val); - - bool operator == (const LogicalType & rhs) const - { - if (__isset.STRING != rhs.__isset.STRING) - return false; - else if (__isset.STRING && !(STRING == rhs.STRING)) - return false; - if (__isset.MAP != rhs.__isset.MAP) - return false; - else if (__isset.MAP && !(MAP == rhs.MAP)) - return false; - if (__isset.LIST != rhs.__isset.LIST) - return false; - else if (__isset.LIST && !(LIST == rhs.LIST)) - return false; - if (__isset.ENUM != rhs.__isset.ENUM) - return false; - else if (__isset.ENUM && !(ENUM == rhs.ENUM)) - return false; - if (__isset.DECIMAL != rhs.__isset.DECIMAL) - return false; - else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL)) - return false; - if (__isset.DATE != rhs.__isset.DATE) - return false; - else if (__isset.DATE && !(DATE == rhs.DATE)) - return false; - if (__isset.TIME != rhs.__isset.TIME) - return false; - else if (__isset.TIME && !(TIME == rhs.TIME)) - return false; - if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP) - return false; - else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP)) - return false; - if (__isset.INTEGER != rhs.__isset.INTEGER) - return false; - else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER)) - return false; - if (__isset.UNKNOWN != rhs.__isset.UNKNOWN) - return false; - else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN)) - return false; - if (__isset.JSON != rhs.__isset.JSON) - return false; - else if (__isset.JSON && !(JSON == rhs.JSON)) - return false; - if (__isset.BSON != rhs.__isset.BSON) - return false; - else if (__isset.BSON && !(BSON == rhs.BSON)) - return false; - if (__isset.UUID != rhs.__isset.UUID) - return false; - else if (__isset.UUID && !(UUID == rhs.UUID)) - return false; - return true; - } - bool operator != (const LogicalType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const LogicalType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(LogicalType &a, LogicalType &b); - -std::ostream& operator<<(std::ostream& out, const LogicalType& obj); - -typedef struct _SchemaElement__isset { - _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false), field_id(false), logicalType(false) {} - bool type :1; - bool type_length :1; - bool repetition_type :1; - bool num_children :1; - bool converted_type :1; - bool scale :1; - bool precision :1; - bool field_id :1; - bool logicalType :1; -} _SchemaElement__isset; - -class SchemaElement : public virtual ::duckdb_apache::thrift::TBase { - public: - - SchemaElement(const SchemaElement&); - SchemaElement& operator=(const SchemaElement&); - SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0), field_id(0) { - } - - virtual ~SchemaElement() throw(); - Type::type type; - int32_t type_length; - FieldRepetitionType::type repetition_type; - std::string name; - int32_t num_children; - ConvertedType::type converted_type; - int32_t scale; - int32_t precision; - int32_t field_id; - LogicalType logicalType; - - _SchemaElement__isset __isset; - - void __set_type(const Type::type val); - - void __set_type_length(const int32_t val); - - void __set_repetition_type(const FieldRepetitionType::type val); - - void __set_name(const std::string& val); - - void __set_num_children(const int32_t val); - - void __set_converted_type(const ConvertedType::type val); - - void __set_scale(const int32_t val); - - void __set_precision(const int32_t val); - - void __set_field_id(const int32_t val); - - void __set_logicalType(const LogicalType& val); - - bool operator == (const SchemaElement & rhs) const - { - if (__isset.type != rhs.__isset.type) - return false; - else if (__isset.type && !(type == rhs.type)) - return false; - if (__isset.type_length != rhs.__isset.type_length) - return false; - else if (__isset.type_length && !(type_length == rhs.type_length)) - return false; - if (__isset.repetition_type != rhs.__isset.repetition_type) - return false; - else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) - return false; - if (!(name == rhs.name)) - return false; - if (__isset.num_children != rhs.__isset.num_children) - return false; - else if (__isset.num_children && !(num_children == rhs.num_children)) - return false; - if (__isset.converted_type != rhs.__isset.converted_type) - return false; - else if (__isset.converted_type && !(converted_type == rhs.converted_type)) - return false; - if (__isset.scale != rhs.__isset.scale) - return false; - else if (__isset.scale && !(scale == rhs.scale)) - return false; - if (__isset.precision != rhs.__isset.precision) - return false; - else if (__isset.precision && !(precision == rhs.precision)) - return false; - if (__isset.field_id != rhs.__isset.field_id) - return false; - else if (__isset.field_id && !(field_id == rhs.field_id)) - return false; - if (__isset.logicalType != rhs.__isset.logicalType) - return false; - else if (__isset.logicalType && !(logicalType == rhs.logicalType)) - return false; - return true; - } - bool operator != (const SchemaElement &rhs) const { - return !(*this == rhs); - } - - bool operator < (const SchemaElement & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(SchemaElement &a, SchemaElement &b); - -std::ostream& operator<<(std::ostream& out, const SchemaElement& obj); - -typedef struct _DataPageHeader__isset { - _DataPageHeader__isset() : statistics(false) {} - bool statistics :1; -} _DataPageHeader__isset; - -class DataPageHeader : public virtual ::duckdb_apache::thrift::TBase { - public: - - DataPageHeader(const DataPageHeader&); - DataPageHeader& operator=(const DataPageHeader&); - DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) { - } - - virtual ~DataPageHeader() throw(); - int32_t num_values; - Encoding::type encoding; - Encoding::type definition_level_encoding; - Encoding::type repetition_level_encoding; - Statistics statistics; - - _DataPageHeader__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_definition_level_encoding(const Encoding::type val); - - void __set_repetition_level_encoding(const Encoding::type val); - - void __set_statistics(const Statistics& val); - - bool operator == (const DataPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_level_encoding == rhs.definition_level_encoding)) - return false; - if (!(repetition_level_encoding == rhs.repetition_level_encoding)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator != (const DataPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DataPageHeader & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DataPageHeader &a, DataPageHeader &b); - -std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); - - -class IndexPageHeader : public virtual ::duckdb_apache::thrift::TBase { - public: - - IndexPageHeader(const IndexPageHeader&); - IndexPageHeader& operator=(const IndexPageHeader&); - IndexPageHeader() { - } - - virtual ~IndexPageHeader() throw(); - - bool operator == (const IndexPageHeader & /* rhs */) const - { - return true; - } - bool operator != (const IndexPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const IndexPageHeader & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(IndexPageHeader &a, IndexPageHeader &b); - -std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj); - -typedef struct _DictionaryPageHeader__isset { - _DictionaryPageHeader__isset() : is_sorted(false) {} - bool is_sorted :1; -} _DictionaryPageHeader__isset; - -class DictionaryPageHeader : public virtual ::duckdb_apache::thrift::TBase { - public: - - DictionaryPageHeader(const DictionaryPageHeader&); - DictionaryPageHeader& operator=(const DictionaryPageHeader&); - DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) { - } - - virtual ~DictionaryPageHeader() throw(); - int32_t num_values; - Encoding::type encoding; - bool is_sorted; - - _DictionaryPageHeader__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_is_sorted(const bool val); - - bool operator == (const DictionaryPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (__isset.is_sorted != rhs.__isset.is_sorted) - return false; - else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) - return false; - return true; - } - bool operator != (const DictionaryPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DictionaryPageHeader & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DictionaryPageHeader &a, DictionaryPageHeader &b); - -std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj); - -typedef struct _DataPageHeaderV2__isset { - _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {} - bool is_compressed :1; - bool statistics :1; -} _DataPageHeaderV2__isset; - -class DataPageHeaderV2 : public virtual ::duckdb_apache::thrift::TBase { - public: - - DataPageHeaderV2(const DataPageHeaderV2&); - DataPageHeaderV2& operator=(const DataPageHeaderV2&); - DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) { - } - - virtual ~DataPageHeaderV2() throw(); - int32_t num_values; - int32_t num_nulls; - int32_t num_rows; - Encoding::type encoding; - int32_t definition_levels_byte_length; - int32_t repetition_levels_byte_length; - bool is_compressed; - Statistics statistics; - - _DataPageHeaderV2__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_num_nulls(const int32_t val); - - void __set_num_rows(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_definition_levels_byte_length(const int32_t val); - - void __set_repetition_levels_byte_length(const int32_t val); - - void __set_is_compressed(const bool val); - - void __set_statistics(const Statistics& val); - - bool operator == (const DataPageHeaderV2 & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(num_nulls == rhs.num_nulls)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) - return false; - if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) - return false; - if (__isset.is_compressed != rhs.__isset.is_compressed) - return false; - else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator != (const DataPageHeaderV2 &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DataPageHeaderV2 & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b); - -std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); - -typedef struct _PageHeader__isset { - _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {} - bool crc :1; - bool data_page_header :1; - bool index_page_header :1; - bool dictionary_page_header :1; - bool data_page_header_v2 :1; -} _PageHeader__isset; - -class PageHeader : public virtual ::duckdb_apache::thrift::TBase { - public: - - PageHeader(const PageHeader&); - PageHeader& operator=(const PageHeader&); - PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) { - } - - virtual ~PageHeader() throw(); - PageType::type type; - int32_t uncompressed_page_size; - int32_t compressed_page_size; - int32_t crc; - DataPageHeader data_page_header; - IndexPageHeader index_page_header; - DictionaryPageHeader dictionary_page_header; - DataPageHeaderV2 data_page_header_v2; - - _PageHeader__isset __isset; - - void __set_type(const PageType::type val); - - void __set_uncompressed_page_size(const int32_t val); - - void __set_compressed_page_size(const int32_t val); - - void __set_crc(const int32_t val); - - void __set_data_page_header(const DataPageHeader& val); - - void __set_index_page_header(const IndexPageHeader& val); - - void __set_dictionary_page_header(const DictionaryPageHeader& val); - - void __set_data_page_header_v2(const DataPageHeaderV2& val); - - bool operator == (const PageHeader & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(uncompressed_page_size == rhs.uncompressed_page_size)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (__isset.crc != rhs.__isset.crc) - return false; - else if (__isset.crc && !(crc == rhs.crc)) - return false; - if (__isset.data_page_header != rhs.__isset.data_page_header) - return false; - else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) - return false; - if (__isset.index_page_header != rhs.__isset.index_page_header) - return false; - else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) - return false; - if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) - return false; - else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) - return false; - if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) - return false; - else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) - return false; - return true; - } - bool operator != (const PageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const PageHeader & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageHeader &a, PageHeader &b); - -std::ostream& operator<<(std::ostream& out, const PageHeader& obj); - -typedef struct _KeyValue__isset { - _KeyValue__isset() : value(false) {} - bool value :1; -} _KeyValue__isset; - -class KeyValue : public virtual ::duckdb_apache::thrift::TBase { - public: - - KeyValue(const KeyValue&); - KeyValue& operator=(const KeyValue&); - KeyValue() : key(), value() { - } - - virtual ~KeyValue() throw(); - std::string key; - std::string value; - - _KeyValue__isset __isset; - - void __set_key(const std::string& val); - - void __set_value(const std::string& val); - - bool operator == (const KeyValue & rhs) const - { - if (!(key == rhs.key)) - return false; - if (__isset.value != rhs.__isset.value) - return false; - else if (__isset.value && !(value == rhs.value)) - return false; - return true; - } - bool operator != (const KeyValue &rhs) const { - return !(*this == rhs); - } - - bool operator < (const KeyValue & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(KeyValue &a, KeyValue &b); - -std::ostream& operator<<(std::ostream& out, const KeyValue& obj); - - -class SortingColumn : public virtual ::duckdb_apache::thrift::TBase { - public: - - SortingColumn(const SortingColumn&); - SortingColumn& operator=(const SortingColumn&); - SortingColumn() : column_idx(0), descending(0), nulls_first(0) { - } - - virtual ~SortingColumn() throw(); - int32_t column_idx; - bool descending; - bool nulls_first; - - void __set_column_idx(const int32_t val); - - void __set_descending(const bool val); - - void __set_nulls_first(const bool val); - - bool operator == (const SortingColumn & rhs) const - { - if (!(column_idx == rhs.column_idx)) - return false; - if (!(descending == rhs.descending)) - return false; - if (!(nulls_first == rhs.nulls_first)) - return false; - return true; - } - bool operator != (const SortingColumn &rhs) const { - return !(*this == rhs); - } - - bool operator < (const SortingColumn & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(SortingColumn &a, SortingColumn &b); - -std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); - - -class PageEncodingStats : public virtual ::duckdb_apache::thrift::TBase { - public: - - PageEncodingStats(const PageEncodingStats&); - PageEncodingStats& operator=(const PageEncodingStats&); - PageEncodingStats() : page_type((PageType::type)0), encoding((Encoding::type)0), count(0) { - } - - virtual ~PageEncodingStats() throw(); - PageType::type page_type; - Encoding::type encoding; - int32_t count; - - void __set_page_type(const PageType::type val); - - void __set_encoding(const Encoding::type val); - - void __set_count(const int32_t val); - - bool operator == (const PageEncodingStats & rhs) const - { - if (!(page_type == rhs.page_type)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(count == rhs.count)) - return false; - return true; - } - bool operator != (const PageEncodingStats &rhs) const { - return !(*this == rhs); - } - - bool operator < (const PageEncodingStats & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageEncodingStats &a, PageEncodingStats &b); - -std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj); - -typedef struct _ColumnMetaData__isset { - _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false) {} - bool key_value_metadata :1; - bool index_page_offset :1; - bool dictionary_page_offset :1; - bool statistics :1; - bool encoding_stats :1; -} _ColumnMetaData__isset; - -class ColumnMetaData : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnMetaData(const ColumnMetaData&); - ColumnMetaData& operator=(const ColumnMetaData&); - ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) { - } - - virtual ~ColumnMetaData() throw(); - Type::type type; - std::vector encodings; - std::vector path_in_schema; - CompressionCodec::type codec; - int64_t num_values; - int64_t total_uncompressed_size; - int64_t total_compressed_size; - std::vector key_value_metadata; - int64_t data_page_offset; - int64_t index_page_offset; - int64_t dictionary_page_offset; - Statistics statistics; - std::vector encoding_stats; - - _ColumnMetaData__isset __isset; - - void __set_type(const Type::type val); - - void __set_encodings(const std::vector & val); - - void __set_path_in_schema(const std::vector & val); - - void __set_codec(const CompressionCodec::type val); - - void __set_num_values(const int64_t val); - - void __set_total_uncompressed_size(const int64_t val); - - void __set_total_compressed_size(const int64_t val); - - void __set_key_value_metadata(const std::vector & val); - - void __set_data_page_offset(const int64_t val); - - void __set_index_page_offset(const int64_t val); - - void __set_dictionary_page_offset(const int64_t val); - - void __set_statistics(const Statistics& val); - - void __set_encoding_stats(const std::vector & val); - - bool operator == (const ColumnMetaData & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(encodings == rhs.encodings)) - return false; - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (!(codec == rhs.codec)) - return false; - if (!(num_values == rhs.num_values)) - return false; - if (!(total_uncompressed_size == rhs.total_uncompressed_size)) - return false; - if (!(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (!(data_page_offset == rhs.data_page_offset)) - return false; - if (__isset.index_page_offset != rhs.__isset.index_page_offset) - return false; - else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) - return false; - if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) - return false; - else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - if (__isset.encoding_stats != rhs.__isset.encoding_stats) - return false; - else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) - return false; - return true; - } - bool operator != (const ColumnMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnMetaData & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnMetaData &a, ColumnMetaData &b); - -std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); - - -class EncryptionWithFooterKey : public virtual ::duckdb_apache::thrift::TBase { - public: - - EncryptionWithFooterKey(const EncryptionWithFooterKey&); - EncryptionWithFooterKey& operator=(const EncryptionWithFooterKey&); - EncryptionWithFooterKey() { - } - - virtual ~EncryptionWithFooterKey() throw(); - - bool operator == (const EncryptionWithFooterKey & /* rhs */) const - { - return true; - } - bool operator != (const EncryptionWithFooterKey &rhs) const { - return !(*this == rhs); - } - - bool operator < (const EncryptionWithFooterKey & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b); - -std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj); - -typedef struct _EncryptionWithColumnKey__isset { - _EncryptionWithColumnKey__isset() : key_metadata(false) {} - bool key_metadata :1; -} _EncryptionWithColumnKey__isset; - -class EncryptionWithColumnKey : public virtual ::duckdb_apache::thrift::TBase { - public: - - EncryptionWithColumnKey(const EncryptionWithColumnKey&); - EncryptionWithColumnKey& operator=(const EncryptionWithColumnKey&); - EncryptionWithColumnKey() : key_metadata() { - } - - virtual ~EncryptionWithColumnKey() throw(); - std::vector path_in_schema; - std::string key_metadata; - - _EncryptionWithColumnKey__isset __isset; - - void __set_path_in_schema(const std::vector & val); - - void __set_key_metadata(const std::string& val); - - bool operator == (const EncryptionWithColumnKey & rhs) const - { - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } - bool operator != (const EncryptionWithColumnKey &rhs) const { - return !(*this == rhs); - } - - bool operator < (const EncryptionWithColumnKey & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b); - -std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj); - -typedef struct _ColumnCryptoMetaData__isset { - _ColumnCryptoMetaData__isset() : ENCRYPTION_WITH_FOOTER_KEY(false), ENCRYPTION_WITH_COLUMN_KEY(false) {} - bool ENCRYPTION_WITH_FOOTER_KEY :1; - bool ENCRYPTION_WITH_COLUMN_KEY :1; -} _ColumnCryptoMetaData__isset; - -class ColumnCryptoMetaData : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnCryptoMetaData(const ColumnCryptoMetaData&); - ColumnCryptoMetaData& operator=(const ColumnCryptoMetaData&); - ColumnCryptoMetaData() { - } - - virtual ~ColumnCryptoMetaData() throw(); - EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY; - EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY; - - _ColumnCryptoMetaData__isset __isset; - - void __set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val); - - void __set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val); - - bool operator == (const ColumnCryptoMetaData & rhs) const - { - if (__isset.ENCRYPTION_WITH_FOOTER_KEY != rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY) - return false; - else if (__isset.ENCRYPTION_WITH_FOOTER_KEY && !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY)) - return false; - if (__isset.ENCRYPTION_WITH_COLUMN_KEY != rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY) - return false; - else if (__isset.ENCRYPTION_WITH_COLUMN_KEY && !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY)) - return false; - return true; - } - bool operator != (const ColumnCryptoMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnCryptoMetaData & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b); - -std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj); - -typedef struct _ColumnChunk__isset { - _ColumnChunk__isset() : file_path(false), meta_data(false), offset_index_offset(false), offset_index_length(false), column_index_offset(false), column_index_length(false), crypto_metadata(false), encrypted_column_metadata(false) {} - bool file_path :1; - bool meta_data :1; - bool offset_index_offset :1; - bool offset_index_length :1; - bool column_index_offset :1; - bool column_index_length :1; - bool crypto_metadata :1; - bool encrypted_column_metadata :1; -} _ColumnChunk__isset; - -class ColumnChunk : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnChunk(const ColumnChunk&); - ColumnChunk& operator=(const ColumnChunk&); - ColumnChunk() : file_path(), file_offset(0), offset_index_offset(0), offset_index_length(0), column_index_offset(0), column_index_length(0), encrypted_column_metadata() { - } - - virtual ~ColumnChunk() throw(); - std::string file_path; - int64_t file_offset; - ColumnMetaData meta_data; - int64_t offset_index_offset; - int32_t offset_index_length; - int64_t column_index_offset; - int32_t column_index_length; - ColumnCryptoMetaData crypto_metadata; - std::string encrypted_column_metadata; - - _ColumnChunk__isset __isset; - - void __set_file_path(const std::string& val); - - void __set_file_offset(const int64_t val); - - void __set_meta_data(const ColumnMetaData& val); - - void __set_offset_index_offset(const int64_t val); - - void __set_offset_index_length(const int32_t val); - - void __set_column_index_offset(const int64_t val); - - void __set_column_index_length(const int32_t val); - - void __set_crypto_metadata(const ColumnCryptoMetaData& val); - - void __set_encrypted_column_metadata(const std::string& val); - - bool operator == (const ColumnChunk & rhs) const - { - if (__isset.file_path != rhs.__isset.file_path) - return false; - else if (__isset.file_path && !(file_path == rhs.file_path)) - return false; - if (!(file_offset == rhs.file_offset)) - return false; - if (__isset.meta_data != rhs.__isset.meta_data) - return false; - else if (__isset.meta_data && !(meta_data == rhs.meta_data)) - return false; - if (__isset.offset_index_offset != rhs.__isset.offset_index_offset) - return false; - else if (__isset.offset_index_offset && !(offset_index_offset == rhs.offset_index_offset)) - return false; - if (__isset.offset_index_length != rhs.__isset.offset_index_length) - return false; - else if (__isset.offset_index_length && !(offset_index_length == rhs.offset_index_length)) - return false; - if (__isset.column_index_offset != rhs.__isset.column_index_offset) - return false; - else if (__isset.column_index_offset && !(column_index_offset == rhs.column_index_offset)) - return false; - if (__isset.column_index_length != rhs.__isset.column_index_length) - return false; - else if (__isset.column_index_length && !(column_index_length == rhs.column_index_length)) - return false; - if (__isset.crypto_metadata != rhs.__isset.crypto_metadata) - return false; - else if (__isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata)) - return false; - if (__isset.encrypted_column_metadata != rhs.__isset.encrypted_column_metadata) - return false; - else if (__isset.encrypted_column_metadata && !(encrypted_column_metadata == rhs.encrypted_column_metadata)) - return false; - return true; - } - bool operator != (const ColumnChunk &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnChunk & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnChunk &a, ColumnChunk &b); - -std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj); - -typedef struct _RowGroup__isset { - _RowGroup__isset() : sorting_columns(false), file_offset(false), total_compressed_size(false), ordinal(false) {} - bool sorting_columns :1; - bool file_offset :1; - bool total_compressed_size :1; - bool ordinal :1; -} _RowGroup__isset; - -class RowGroup : public virtual ::duckdb_apache::thrift::TBase { - public: - - RowGroup(const RowGroup&); - RowGroup& operator=(const RowGroup&); - RowGroup() : total_byte_size(0), num_rows(0), file_offset(0), total_compressed_size(0), ordinal(0) { - } - - virtual ~RowGroup() throw(); - std::vector columns; - int64_t total_byte_size; - int64_t num_rows; - std::vector sorting_columns; - int64_t file_offset; - int64_t total_compressed_size; - int16_t ordinal; - - _RowGroup__isset __isset; - - void __set_columns(const std::vector & val); - - void __set_total_byte_size(const int64_t val); - - void __set_num_rows(const int64_t val); - - void __set_sorting_columns(const std::vector & val); - - void __set_file_offset(const int64_t val); - - void __set_total_compressed_size(const int64_t val); - - void __set_ordinal(const int16_t val); - - bool operator == (const RowGroup & rhs) const - { - if (!(columns == rhs.columns)) - return false; - if (!(total_byte_size == rhs.total_byte_size)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (__isset.sorting_columns != rhs.__isset.sorting_columns) - return false; - else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) - return false; - if (__isset.file_offset != rhs.__isset.file_offset) - return false; - else if (__isset.file_offset && !(file_offset == rhs.file_offset)) - return false; - if (__isset.total_compressed_size != rhs.__isset.total_compressed_size) - return false; - else if (__isset.total_compressed_size && !(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.ordinal != rhs.__isset.ordinal) - return false; - else if (__isset.ordinal && !(ordinal == rhs.ordinal)) - return false; - return true; - } - bool operator != (const RowGroup &rhs) const { - return !(*this == rhs); - } - - bool operator < (const RowGroup & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(RowGroup &a, RowGroup &b); - -std::ostream& operator<<(std::ostream& out, const RowGroup& obj); - - -class TypeDefinedOrder : public virtual ::duckdb_apache::thrift::TBase { - public: - - TypeDefinedOrder(const TypeDefinedOrder&); - TypeDefinedOrder& operator=(const TypeDefinedOrder&); - TypeDefinedOrder() { - } - - virtual ~TypeDefinedOrder() throw(); - - bool operator == (const TypeDefinedOrder & /* rhs */) const - { - return true; - } - bool operator != (const TypeDefinedOrder &rhs) const { - return !(*this == rhs); - } - - bool operator < (const TypeDefinedOrder & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TypeDefinedOrder &a, TypeDefinedOrder &b); - -std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj); - -typedef struct _ColumnOrder__isset { - _ColumnOrder__isset() : TYPE_ORDER(false) {} - bool TYPE_ORDER :1; -} _ColumnOrder__isset; - -class ColumnOrder : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnOrder(const ColumnOrder&); - ColumnOrder& operator=(const ColumnOrder&); - ColumnOrder() { - } - - virtual ~ColumnOrder() throw(); - TypeDefinedOrder TYPE_ORDER; - - _ColumnOrder__isset __isset; - - void __set_TYPE_ORDER(const TypeDefinedOrder& val); - - bool operator == (const ColumnOrder & rhs) const - { - if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER) - return false; - else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) - return false; - return true; - } - bool operator != (const ColumnOrder &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnOrder & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnOrder &a, ColumnOrder &b); - -std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj); - - -class PageLocation : public virtual ::duckdb_apache::thrift::TBase { - public: - - PageLocation(const PageLocation&); - PageLocation& operator=(const PageLocation&); - PageLocation() : offset(0), compressed_page_size(0), first_row_index(0) { - } - - virtual ~PageLocation() throw(); - int64_t offset; - int32_t compressed_page_size; - int64_t first_row_index; - - void __set_offset(const int64_t val); - - void __set_compressed_page_size(const int32_t val); - - void __set_first_row_index(const int64_t val); - - bool operator == (const PageLocation & rhs) const - { - if (!(offset == rhs.offset)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (!(first_row_index == rhs.first_row_index)) - return false; - return true; - } - bool operator != (const PageLocation &rhs) const { - return !(*this == rhs); - } - - bool operator < (const PageLocation & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageLocation &a, PageLocation &b); - -std::ostream& operator<<(std::ostream& out, const PageLocation& obj); - - -class OffsetIndex : public virtual ::duckdb_apache::thrift::TBase { - public: - - OffsetIndex(const OffsetIndex&); - OffsetIndex& operator=(const OffsetIndex&); - OffsetIndex() { - } - - virtual ~OffsetIndex() throw(); - std::vector page_locations; - - void __set_page_locations(const std::vector & val); - - bool operator == (const OffsetIndex & rhs) const - { - if (!(page_locations == rhs.page_locations)) - return false; - return true; - } - bool operator != (const OffsetIndex &rhs) const { - return !(*this == rhs); - } - - bool operator < (const OffsetIndex & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(OffsetIndex &a, OffsetIndex &b); - -std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj); - -typedef struct _ColumnIndex__isset { - _ColumnIndex__isset() : null_counts(false) {} - bool null_counts :1; -} _ColumnIndex__isset; - -class ColumnIndex : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnIndex(const ColumnIndex&); - ColumnIndex& operator=(const ColumnIndex&); - ColumnIndex() : boundary_order((BoundaryOrder::type)0) { - } - - virtual ~ColumnIndex() throw(); - std::vector null_pages; - std::vector min_values; - std::vector max_values; - BoundaryOrder::type boundary_order; - std::vector null_counts; - - _ColumnIndex__isset __isset; - - void __set_null_pages(const std::vector & val); - - void __set_min_values(const std::vector & val); - - void __set_max_values(const std::vector & val); - - void __set_boundary_order(const BoundaryOrder::type val); - - void __set_null_counts(const std::vector & val); - - bool operator == (const ColumnIndex & rhs) const - { - if (!(null_pages == rhs.null_pages)) - return false; - if (!(min_values == rhs.min_values)) - return false; - if (!(max_values == rhs.max_values)) - return false; - if (!(boundary_order == rhs.boundary_order)) - return false; - if (__isset.null_counts != rhs.__isset.null_counts) - return false; - else if (__isset.null_counts && !(null_counts == rhs.null_counts)) - return false; - return true; - } - bool operator != (const ColumnIndex &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnIndex & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnIndex &a, ColumnIndex &b); - -std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj); - -typedef struct _AesGcmV1__isset { - _AesGcmV1__isset() : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {} - bool aad_prefix :1; - bool aad_file_unique :1; - bool supply_aad_prefix :1; -} _AesGcmV1__isset; - -class AesGcmV1 : public virtual ::duckdb_apache::thrift::TBase { - public: - - AesGcmV1(const AesGcmV1&); - AesGcmV1& operator=(const AesGcmV1&); - AesGcmV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) { - } - - virtual ~AesGcmV1() throw(); - std::string aad_prefix; - std::string aad_file_unique; - bool supply_aad_prefix; - - _AesGcmV1__isset __isset; - - void __set_aad_prefix(const std::string& val); - - void __set_aad_file_unique(const std::string& val); - - void __set_supply_aad_prefix(const bool val); - - bool operator == (const AesGcmV1 & rhs) const - { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } - bool operator != (const AesGcmV1 &rhs) const { - return !(*this == rhs); - } - - bool operator < (const AesGcmV1 & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(AesGcmV1 &a, AesGcmV1 &b); - -std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj); - -typedef struct _AesGcmCtrV1__isset { - _AesGcmCtrV1__isset() : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {} - bool aad_prefix :1; - bool aad_file_unique :1; - bool supply_aad_prefix :1; -} _AesGcmCtrV1__isset; - -class AesGcmCtrV1 : public virtual ::duckdb_apache::thrift::TBase { - public: - - AesGcmCtrV1(const AesGcmCtrV1&); - AesGcmCtrV1& operator=(const AesGcmCtrV1&); - AesGcmCtrV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) { - } - - virtual ~AesGcmCtrV1() throw(); - std::string aad_prefix; - std::string aad_file_unique; - bool supply_aad_prefix; - - _AesGcmCtrV1__isset __isset; - - void __set_aad_prefix(const std::string& val); - - void __set_aad_file_unique(const std::string& val); - - void __set_supply_aad_prefix(const bool val); - - bool operator == (const AesGcmCtrV1 & rhs) const - { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } - bool operator != (const AesGcmCtrV1 &rhs) const { - return !(*this == rhs); - } - - bool operator < (const AesGcmCtrV1 & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b); - -std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj); - -typedef struct _EncryptionAlgorithm__isset { - _EncryptionAlgorithm__isset() : AES_GCM_V1(false), AES_GCM_CTR_V1(false) {} - bool AES_GCM_V1 :1; - bool AES_GCM_CTR_V1 :1; -} _EncryptionAlgorithm__isset; - -class EncryptionAlgorithm : public virtual ::duckdb_apache::thrift::TBase { - public: - - EncryptionAlgorithm(const EncryptionAlgorithm&); - EncryptionAlgorithm& operator=(const EncryptionAlgorithm&); - EncryptionAlgorithm() { - } - - virtual ~EncryptionAlgorithm() throw(); - AesGcmV1 AES_GCM_V1; - AesGcmCtrV1 AES_GCM_CTR_V1; - - _EncryptionAlgorithm__isset __isset; - - void __set_AES_GCM_V1(const AesGcmV1& val); - - void __set_AES_GCM_CTR_V1(const AesGcmCtrV1& val); - - bool operator == (const EncryptionAlgorithm & rhs) const - { - if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1) - return false; - else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1)) - return false; - if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1) - return false; - else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1)) - return false; - return true; - } - bool operator != (const EncryptionAlgorithm &rhs) const { - return !(*this == rhs); - } - - bool operator < (const EncryptionAlgorithm & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b); - -std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj); - -typedef struct _FileMetaData__isset { - _FileMetaData__isset() : key_value_metadata(false), created_by(false), column_orders(false), encryption_algorithm(false), footer_signing_key_metadata(false) {} - bool key_value_metadata :1; - bool created_by :1; - bool column_orders :1; - bool encryption_algorithm :1; - bool footer_signing_key_metadata :1; -} _FileMetaData__isset; - -class FileMetaData : public virtual ::duckdb_apache::thrift::TBase { - public: - - FileMetaData(const FileMetaData&); - FileMetaData& operator=(const FileMetaData&); - FileMetaData() : version(0), num_rows(0), created_by(), footer_signing_key_metadata() { - } - - virtual ~FileMetaData() throw(); - int32_t version; - std::vector schema; - int64_t num_rows; - std::vector row_groups; - std::vector key_value_metadata; - std::string created_by; - std::vector column_orders; - EncryptionAlgorithm encryption_algorithm; - std::string footer_signing_key_metadata; - - _FileMetaData__isset __isset; - - void __set_version(const int32_t val); - - void __set_schema(const std::vector & val); - - void __set_num_rows(const int64_t val); - - void __set_row_groups(const std::vector & val); - - void __set_key_value_metadata(const std::vector & val); - - void __set_created_by(const std::string& val); - - void __set_column_orders(const std::vector & val); - - void __set_encryption_algorithm(const EncryptionAlgorithm& val); - - void __set_footer_signing_key_metadata(const std::string& val); - - bool operator == (const FileMetaData & rhs) const - { - if (!(version == rhs.version)) - return false; - if (!(schema == rhs.schema)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(row_groups == rhs.row_groups)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (__isset.created_by != rhs.__isset.created_by) - return false; - else if (__isset.created_by && !(created_by == rhs.created_by)) - return false; - if (__isset.column_orders != rhs.__isset.column_orders) - return false; - else if (__isset.column_orders && !(column_orders == rhs.column_orders)) - return false; - if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm) - return false; - else if (__isset.encryption_algorithm && !(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.footer_signing_key_metadata != rhs.__isset.footer_signing_key_metadata) - return false; - else if (__isset.footer_signing_key_metadata && !(footer_signing_key_metadata == rhs.footer_signing_key_metadata)) - return false; - return true; - } - bool operator != (const FileMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const FileMetaData & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(FileMetaData &a, FileMetaData &b); - -std::ostream& operator<<(std::ostream& out, const FileMetaData& obj); - -typedef struct _FileCryptoMetaData__isset { - _FileCryptoMetaData__isset() : key_metadata(false) {} - bool key_metadata :1; -} _FileCryptoMetaData__isset; - -class FileCryptoMetaData : public virtual ::duckdb_apache::thrift::TBase { - public: - - FileCryptoMetaData(const FileCryptoMetaData&); - FileCryptoMetaData& operator=(const FileCryptoMetaData&); - FileCryptoMetaData() : key_metadata() { - } - - virtual ~FileCryptoMetaData() throw(); - EncryptionAlgorithm encryption_algorithm; - std::string key_metadata; - - _FileCryptoMetaData__isset __isset; - - void __set_encryption_algorithm(const EncryptionAlgorithm& val); - - void __set_key_metadata(const std::string& val); - - bool operator == (const FileCryptoMetaData & rhs) const - { - if (!(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } - bool operator != (const FileCryptoMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const FileCryptoMetaData & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(FileCryptoMetaData &a, FileCryptoMetaData &b); - -std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); - -}} // namespace - -#endif - - -// LICENSE_CHANGE_END - - -#include - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ -#define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ 1 - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ -#define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1 - - - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -using duckdb_apache::thrift::transport::TTransport; - -/** - * Helper class that provides default implementations of TProtocol methods. - * - * This class provides default implementations of the non-virtual TProtocol - * methods. It exists primarily so TVirtualProtocol can derive from it. It - * prevents TVirtualProtocol methods from causing infinite recursion if the - * non-virtual methods are not overridden by the TVirtualProtocol subclass. - * - * You probably don't want to use this class directly. Use TVirtualProtocol - * instead. - */ -class TProtocolDefaults : public TProtocol { -public: - uint32_t readMessageBegin(std::string& name, TMessageType& messageType, int32_t& seqid) { - (void)name; - (void)messageType; - (void)seqid; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readMessageEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readStructBegin(std::string& name) { - (void)name; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readStructEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readFieldBegin(std::string& name, TType& fieldType, int16_t& fieldId) { - (void)name; - (void)fieldType; - (void)fieldId; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readFieldEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readMapBegin(TType& keyType, TType& valType, uint32_t& size) { - (void)keyType; - (void)valType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readMapEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readListBegin(TType& elemType, uint32_t& size) { - (void)elemType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readListEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readSetBegin(TType& elemType, uint32_t& size) { - (void)elemType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readSetEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readBool(bool& value) { - (void)value; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readBool(std::vector::reference value) { - (void)value; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readByte(int8_t& byte) { - (void)byte; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readI16(int16_t& i16) { - (void)i16; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readI32(int32_t& i32) { - (void)i32; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readI64(int64_t& i64) { - (void)i64; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readDouble(double& dub) { - (void)dub; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readString(std::string& str) { - (void)str; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readBinary(std::string& str) { - (void)str; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t writeMessageBegin(const std::string& name, - const TMessageType messageType, - const int32_t seqid) { - (void)name; - (void)messageType; - (void)seqid; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeMessageEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeStructBegin(const char* name) { - (void)name; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeStructEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeFieldBegin(const char* name, const TType fieldType, const int16_t fieldId) { - (void)name; - (void)fieldType; - (void)fieldId; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeFieldEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeFieldStop() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeMapBegin(const TType keyType, const TType valType, const uint32_t size) { - (void)keyType; - (void)valType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeMapEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeListBegin(const TType elemType, const uint32_t size) { - (void)elemType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeListEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeSetBegin(const TType elemType, const uint32_t size) { - (void)elemType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeSetEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeBool(const bool value) { - (void)value; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeByte(const int8_t byte) { - (void)byte; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeI16(const int16_t i16) { - (void)i16; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeI32(const int32_t i32) { - (void)i32; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeI64(const int64_t i64) { - (void)i64; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeDouble(const double dub) { - (void)dub; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeString(const std::string& str) { - (void)str; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeBinary(const std::string& str) { - (void)str; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t skip(TType type) { return ::duckdb_apache::thrift::protocol::skip(*this, type); } - -protected: - TProtocolDefaults(std::shared_ptr ptrans) : TProtocol(ptrans) {} -}; - -/** - * Concrete TProtocol classes should inherit from TVirtualProtocol - * so they don't have to manually override virtual methods. - */ -template -class TVirtualProtocol : public Super_ { -public: - /** - * Writing functions. - */ - - uint32_t writeMessageBegin_virt(const std::string& name, - const TMessageType messageType, - const int32_t seqid) override { - return static_cast(this)->writeMessageBegin(name, messageType, seqid); - } - - uint32_t writeMessageEnd_virt() override { - return static_cast(this)->writeMessageEnd(); - } - - uint32_t writeStructBegin_virt(const char* name) override { - return static_cast(this)->writeStructBegin(name); - } - - uint32_t writeStructEnd_virt() override { return static_cast(this)->writeStructEnd(); } - - uint32_t writeFieldBegin_virt(const char* name, - const TType fieldType, - const int16_t fieldId) override { - return static_cast(this)->writeFieldBegin(name, fieldType, fieldId); - } - - uint32_t writeFieldEnd_virt() override { return static_cast(this)->writeFieldEnd(); } - - uint32_t writeFieldStop_virt() override { return static_cast(this)->writeFieldStop(); } - - uint32_t writeMapBegin_virt(const TType keyType, - const TType valType, - const uint32_t size) override { - return static_cast(this)->writeMapBegin(keyType, valType, size); - } - - uint32_t writeMapEnd_virt() override { return static_cast(this)->writeMapEnd(); } - - uint32_t writeListBegin_virt(const TType elemType, const uint32_t size) override { - return static_cast(this)->writeListBegin(elemType, size); - } - - uint32_t writeListEnd_virt() override { return static_cast(this)->writeListEnd(); } - - uint32_t writeSetBegin_virt(const TType elemType, const uint32_t size) override { - return static_cast(this)->writeSetBegin(elemType, size); - } - - uint32_t writeSetEnd_virt() override { return static_cast(this)->writeSetEnd(); } - - uint32_t writeBool_virt(const bool value) override { - return static_cast(this)->writeBool(value); - } - - uint32_t writeByte_virt(const int8_t byte) override { - return static_cast(this)->writeByte(byte); - } - - uint32_t writeI16_virt(const int16_t i16) override { - return static_cast(this)->writeI16(i16); - } - - uint32_t writeI32_virt(const int32_t i32) override { - return static_cast(this)->writeI32(i32); - } - - uint32_t writeI64_virt(const int64_t i64) override { - return static_cast(this)->writeI64(i64); - } - - uint32_t writeDouble_virt(const double dub) override { - return static_cast(this)->writeDouble(dub); - } - - uint32_t writeString_virt(const std::string& str) override { - return static_cast(this)->writeString(str); - } - - uint32_t writeBinary_virt(const std::string& str) override { - return static_cast(this)->writeBinary(str); - } - - /** - * Reading functions - */ - - uint32_t readMessageBegin_virt(std::string& name, - TMessageType& messageType, - int32_t& seqid) override { - return static_cast(this)->readMessageBegin(name, messageType, seqid); - } - - uint32_t readMessageEnd_virt() override { return static_cast(this)->readMessageEnd(); } - - uint32_t readStructBegin_virt(std::string& name) override { - return static_cast(this)->readStructBegin(name); - } - - uint32_t readStructEnd_virt() override { return static_cast(this)->readStructEnd(); } - - uint32_t readFieldBegin_virt(std::string& name, TType& fieldType, int16_t& fieldId) override { - return static_cast(this)->readFieldBegin(name, fieldType, fieldId); - } - - uint32_t readFieldEnd_virt() override { return static_cast(this)->readFieldEnd(); } - - uint32_t readMapBegin_virt(TType& keyType, TType& valType, uint32_t& size) override { - return static_cast(this)->readMapBegin(keyType, valType, size); - } - - uint32_t readMapEnd_virt() override { return static_cast(this)->readMapEnd(); } - - uint32_t readListBegin_virt(TType& elemType, uint32_t& size) override { - return static_cast(this)->readListBegin(elemType, size); - } - - uint32_t readListEnd_virt() override { return static_cast(this)->readListEnd(); } - - uint32_t readSetBegin_virt(TType& elemType, uint32_t& size) override { - return static_cast(this)->readSetBegin(elemType, size); - } - - uint32_t readSetEnd_virt() override { return static_cast(this)->readSetEnd(); } - - uint32_t readBool_virt(bool& value) override { - return static_cast(this)->readBool(value); - } - - uint32_t readBool_virt(std::vector::reference value) override { - return static_cast(this)->readBool(value); - } - - uint32_t readByte_virt(int8_t& byte) override { - return static_cast(this)->readByte(byte); - } - - uint32_t readI16_virt(int16_t& i16) override { - return static_cast(this)->readI16(i16); - } - - uint32_t readI32_virt(int32_t& i32) override { - return static_cast(this)->readI32(i32); - } - - uint32_t readI64_virt(int64_t& i64) override { - return static_cast(this)->readI64(i64); - } - - uint32_t readDouble_virt(double& dub) override { - return static_cast(this)->readDouble(dub); - } - - uint32_t readString_virt(std::string& str) override { - return static_cast(this)->readString(str); - } - - uint32_t readBinary_virt(std::string& str) override { - return static_cast(this)->readBinary(str); - } - - uint32_t skip_virt(TType type) override { return static_cast(this)->skip(type); } - - /* - * Provide a default skip() implementation that uses non-virtual read - * methods. - * - * Note: subclasses that use TVirtualProtocol to derive from another protocol - * implementation (i.e., not TProtocolDefaults) should beware that this may - * override any non-default skip() implementation provided by the parent - * transport class. They may need to explicitly redefine skip() to call the - * correct parent implementation, if desired. - */ - uint32_t skip(TType type) { - auto* const prot = static_cast(this); - return ::duckdb_apache::thrift::protocol::skip(*prot, type); - } - - /* - * Provide a default readBool() implementation for use with - * std::vector, that behaves the same as reading into a normal bool. - * - * Subclasses can override this if desired, but there normally shouldn't - * be a need to. - */ - uint32_t readBool(std::vector::reference value) { - bool b = false; - uint32_t ret = static_cast(this)->readBool(b); - value = b; - return ret; - } - using Super_::readBool; // so we don't hide readBool(bool&) - -protected: - TVirtualProtocol(std::shared_ptr ptrans) : Super_(ptrans) {} -}; -} -} -} // duckdb_apache::thrift::protocol - -#endif // #define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1 - - -// LICENSE_CHANGE_END - - -#include -#include - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -/** - * C++ Implementation of the Compact Protocol as described in THRIFT-110 - */ -template -class TCompactProtocolT : public TVirtualProtocol > { -public: - static const int8_t PROTOCOL_ID = (int8_t)0x82u; - static const int8_t VERSION_N = 1; - static const int8_t VERSION_MASK = 0x1f; // 0001 1111 - -protected: - static const int8_t TYPE_MASK = (int8_t)0xE0u; // 1110 0000 - static const int8_t TYPE_BITS = 0x07; // 0000 0111 - static const int32_t TYPE_SHIFT_AMOUNT = 5; - - Transport_* trans_; - - /** - * (Writing) If we encounter a boolean field begin, save the TField here - * so it can have the value incorporated. - */ - struct { - const char* name; - TType fieldType; - int16_t fieldId; - } booleanField_; - - /** - * (Reading) If we read a field header, and it's a boolean field, save - * the boolean value here so that readBool can use it. - */ - struct { - bool hasBoolValue; - bool boolValue; - } boolValue_; - - /** - * Used to keep track of the last field for the current and previous structs, - * so we can do the delta stuff. - */ - - std::stack lastField_; - int16_t lastFieldId_; - -public: - TCompactProtocolT(std::shared_ptr trans) - : TVirtualProtocol >(trans), - trans_(trans.get()), - lastFieldId_(0), - string_limit_(0), - string_buf_(nullptr), - string_buf_size_(0), - container_limit_(0) { - booleanField_.name = nullptr; - boolValue_.hasBoolValue = false; - } - - TCompactProtocolT(std::shared_ptr trans, - int32_t string_limit, - int32_t container_limit) - : TVirtualProtocol >(trans), - trans_(trans.get()), - lastFieldId_(0), - string_limit_(string_limit), - string_buf_(nullptr), - string_buf_size_(0), - container_limit_(container_limit) { - booleanField_.name = nullptr; - boolValue_.hasBoolValue = false; - } - - ~TCompactProtocolT() override { free(string_buf_); } - - /** - * Writing functions - */ - - virtual uint32_t writeMessageBegin(const std::string& name, - const TMessageType messageType, - const int32_t seqid); - - uint32_t writeStructBegin(const char* name); - - uint32_t writeStructEnd(); - - uint32_t writeFieldBegin(const char* name, const TType fieldType, const int16_t fieldId); - - uint32_t writeFieldStop(); - - uint32_t writeListBegin(const TType elemType, const uint32_t size); - - uint32_t writeSetBegin(const TType elemType, const uint32_t size); - - virtual uint32_t writeMapBegin(const TType keyType, const TType valType, const uint32_t size); - - uint32_t writeBool(const bool value); - - uint32_t writeByte(const int8_t byte); - - uint32_t writeI16(const int16_t i16); - - uint32_t writeI32(const int32_t i32); - - uint32_t writeI64(const int64_t i64); - - uint32_t writeDouble(const double dub); - - uint32_t writeString(const std::string& str); - - uint32_t writeBinary(const std::string& str); - - /** - * These methods are called by structs, but don't actually have any wired - * output or purpose - */ - virtual uint32_t writeMessageEnd() { return 0; } - uint32_t writeMapEnd() { return 0; } - uint32_t writeListEnd() { return 0; } - uint32_t writeSetEnd() { return 0; } - uint32_t writeFieldEnd() { return 0; } - -protected: - int32_t writeFieldBeginInternal(const char* name, - const TType fieldType, - const int16_t fieldId, - int8_t typeOverride); - uint32_t writeCollectionBegin(const TType elemType, int32_t size); - uint32_t writeVarint32(uint32_t n); - uint32_t writeVarint64(uint64_t n); - uint64_t i64ToZigzag(const int64_t l); - uint32_t i32ToZigzag(const int32_t n); - inline int8_t getCompactType(const TType ttype); - -public: - uint32_t readMessageBegin(std::string& name, TMessageType& messageType, int32_t& seqid); - - uint32_t readStructBegin(std::string& name); - - uint32_t readStructEnd(); - - uint32_t readFieldBegin(std::string& name, TType& fieldType, int16_t& fieldId); - - uint32_t readMapBegin(TType& keyType, TType& valType, uint32_t& size); - - uint32_t readListBegin(TType& elemType, uint32_t& size); - - uint32_t readSetBegin(TType& elemType, uint32_t& size); - - uint32_t readBool(bool& value); - // Provide the default readBool() implementation for std::vector - using TVirtualProtocol >::readBool; - - uint32_t readByte(int8_t& byte); - - uint32_t readI16(int16_t& i16); - - uint32_t readI32(int32_t& i32); - - uint32_t readI64(int64_t& i64); - - uint32_t readDouble(double& dub); - - uint32_t readString(std::string& str); - - uint32_t readBinary(std::string& str); - - /* - *These methods are here for the struct to call, but don't have any wire - * encoding. - */ - uint32_t readMessageEnd() { return 0; } - uint32_t readFieldEnd() { return 0; } - uint32_t readMapEnd() { return 0; } - uint32_t readListEnd() { return 0; } - uint32_t readSetEnd() { return 0; } - -protected: - uint32_t readVarint32(int32_t& i32); - uint32_t readVarint64(int64_t& i64); - int32_t zigzagToI32(uint32_t n); - int64_t zigzagToI64(uint64_t n); - TType getTType(int8_t type); - - // Buffer for reading strings, save for the lifetime of the protocol to - // avoid memory churn allocating memory on every string read - int32_t string_limit_; - uint8_t* string_buf_; - int32_t string_buf_size_; - int32_t container_limit_; -}; - -typedef TCompactProtocolT TCompactProtocol; - -/** - * Constructs compact protocol handlers - */ -template -class TCompactProtocolFactoryT : public TProtocolFactory { -public: - TCompactProtocolFactoryT() : string_limit_(0), container_limit_(0) {} - - TCompactProtocolFactoryT(int32_t string_limit, int32_t container_limit) - : string_limit_(string_limit), container_limit_(container_limit) {} - - ~TCompactProtocolFactoryT() override = default; - - void setStringSizeLimit(int32_t string_limit) { string_limit_ = string_limit; } - - void setContainerSizeLimit(int32_t container_limit) { container_limit_ = container_limit; } - - std::shared_ptr getProtocol(std::shared_ptr trans) override { - std::shared_ptr specific_trans = std::static_pointer_cast(trans); - TProtocol* prot; - if (specific_trans) { - prot = new TCompactProtocolT(specific_trans, string_limit_, container_limit_); - } else { - prot = new TCompactProtocol(trans, string_limit_, container_limit_); - } - - return std::shared_ptr(prot); - } - -private: - int32_t string_limit_; - int32_t container_limit_; -}; - -typedef TCompactProtocolFactoryT TCompactProtocolFactory; -} -} -} // duckdb_apache::thrift::protocol - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -#ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ -#define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ 1 - -#include - -/* - * TCompactProtocol::i*ToZigzag depend on the fact that the right shift - * operator on a signed integer is an arithmetic (sign-extending) shift. - * If this is not the case, the current implementation will not work. - * If anyone encounters this error, we can try to figure out the best - * way to implement an arithmetic right shift on their platform. - */ -#if !defined(SIGNED_RIGHT_SHIFT_IS) || !defined(ARITHMETIC_RIGHT_SHIFT) -# error "Unable to determine the behavior of a signed right shift" -#endif -#if SIGNED_RIGHT_SHIFT_IS != ARITHMETIC_RIGHT_SHIFT -# error "TCompactProtocol currently only works if a signed right shift is arithmetic" -#endif - -#ifndef UNLIKELY -#ifdef __GNUC__ -#define UNLIKELY(val) (__builtin_expect((val), 0)) -#else -#define UNLIKELY(val) (val) -#endif -#endif - -namespace duckdb_apache { namespace thrift { namespace protocol { - -namespace detail { namespace compact { - -enum Types { - CT_STOP = 0x00, - CT_BOOLEAN_TRUE = 0x01, - CT_BOOLEAN_FALSE = 0x02, - CT_BYTE = 0x03, - CT_I16 = 0x04, - CT_I32 = 0x05, - CT_I64 = 0x06, - CT_DOUBLE = 0x07, - CT_BINARY = 0x08, - CT_LIST = 0x09, - CT_SET = 0x0A, - CT_MAP = 0x0B, - CT_STRUCT = 0x0C -}; - -const int8_t TTypeToCType[16] = { - CT_STOP, // T_STOP - 0, // unused - CT_BOOLEAN_TRUE, // T_BOOL - CT_BYTE, // T_BYTE - CT_DOUBLE, // T_DOUBLE - 0, // unused - CT_I16, // T_I16 - 0, // unused - CT_I32, // T_I32 - 0, // unused - CT_I64, // T_I64 - CT_BINARY, // T_STRING - CT_STRUCT, // T_STRUCT - CT_MAP, // T_MAP - CT_SET, // T_SET - CT_LIST, // T_LIST -}; - -}} // end detail::compact namespace - - -template -uint32_t TCompactProtocolT::writeMessageBegin( - const std::string& name, - const TMessageType messageType, - const int32_t seqid) { - uint32_t wsize = 0; - wsize += writeByte(PROTOCOL_ID); - wsize += writeByte((VERSION_N & VERSION_MASK) | (((int32_t)messageType << TYPE_SHIFT_AMOUNT) & TYPE_MASK)); - wsize += writeVarint32(seqid); - wsize += writeString(name); - return wsize; -} - -/** - * Write a field header containing the field id and field type. If the - * difference between the current field id and the last one is small (< 15), - * then the field id will be encoded in the 4 MSB as a delta. Otherwise, the - * field id will follow the type header as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::writeFieldBegin(const char* name, - const TType fieldType, - const int16_t fieldId) { - if (fieldType == T_BOOL) { - booleanField_.name = name; - booleanField_.fieldType = fieldType; - booleanField_.fieldId = fieldId; - } else { - return writeFieldBeginInternal(name, fieldType, fieldId, -1); - } - return 0; -} - -/** - * Write the STOP symbol so we know there are no more fields in this struct. - */ -template -uint32_t TCompactProtocolT::writeFieldStop() { - return writeByte(T_STOP); -} - -/** - * Write a struct begin. This doesn't actually put anything on the wire. We - * use it as an opportunity to put special placeholder markers on the field - * stack so we can get the field id deltas correct. - */ -template -uint32_t TCompactProtocolT::writeStructBegin(const char* name) { - (void) name; - lastField_.push(lastFieldId_); - lastFieldId_ = 0; - return 0; -} - -/** - * Write a struct end. This doesn't actually put anything on the wire. We use - * this as an opportunity to pop the last field from the current struct off - * of the field stack. - */ -template -uint32_t TCompactProtocolT::writeStructEnd() { - lastFieldId_ = lastField_.top(); - lastField_.pop(); - return 0; -} - -/** - * Write a List header. - */ -template -uint32_t TCompactProtocolT::writeListBegin(const TType elemType, - const uint32_t size) { - return writeCollectionBegin(elemType, size); -} - -/** - * Write a set header. - */ -template -uint32_t TCompactProtocolT::writeSetBegin(const TType elemType, - const uint32_t size) { - return writeCollectionBegin(elemType, size); -} - -/** - * Write a map header. If the map is empty, omit the key and value type - * headers, as we don't need any additional information to skip it. - */ -template -uint32_t TCompactProtocolT::writeMapBegin(const TType keyType, - const TType valType, - const uint32_t size) { - uint32_t wsize = 0; - - if (size == 0) { - wsize += writeByte(0); - } else { - wsize += writeVarint32(size); - wsize += writeByte(getCompactType(keyType) << 4 | getCompactType(valType)); - } - return wsize; -} - -/** - * Write a boolean value. Potentially, this could be a boolean field, in - * which case the field header info isn't written yet. If so, decide what the - * right type header is for the value and then write the field header. - * Otherwise, write a single byte. - */ -template -uint32_t TCompactProtocolT::writeBool(const bool value) { - uint32_t wsize = 0; - - if (booleanField_.name != NULL) { - // we haven't written the field header yet - wsize - += writeFieldBeginInternal(booleanField_.name, - booleanField_.fieldType, - booleanField_.fieldId, - static_cast(value - ? detail::compact::CT_BOOLEAN_TRUE - : detail::compact::CT_BOOLEAN_FALSE)); - booleanField_.name = NULL; - } else { - // we're not part of a field, so just write the value - wsize - += writeByte(static_cast(value - ? detail::compact::CT_BOOLEAN_TRUE - : detail::compact::CT_BOOLEAN_FALSE)); - } - return wsize; -} - -template -uint32_t TCompactProtocolT::writeByte(const int8_t byte) { - trans_->write((uint8_t*)&byte, 1); - return 1; -} - -/** - * Write an i16 as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::writeI16(const int16_t i16) { - return writeVarint32(i32ToZigzag(i16)); -} - -/** - * Write an i32 as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::writeI32(const int32_t i32) { - return writeVarint32(i32ToZigzag(i32)); -} - -/** - * Write an i64 as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::writeI64(const int64_t i64) { - return writeVarint64(i64ToZigzag(i64)); -} - -/** - * Write a double to the wire as 8 bytes. - */ -template -uint32_t TCompactProtocolT::writeDouble(const double dub) { - //BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t)); - //BOOST_STATIC_ASSERT(std::numeric_limits::is_iec559); - - uint64_t bits = bitwise_cast(dub); - bits = THRIFT_htolell(bits); - trans_->write((uint8_t*)&bits, 8); - return 8; -} - -/** - * Write a string to the wire with a varint size preceding. - */ -template -uint32_t TCompactProtocolT::writeString(const std::string& str) { - return writeBinary(str); -} - -template -uint32_t TCompactProtocolT::writeBinary(const std::string& str) { - if(str.size() > (std::numeric_limits::max)()) - throw TProtocolException(TProtocolException::SIZE_LIMIT); - uint32_t ssize = static_cast(str.size()); - uint32_t wsize = writeVarint32(ssize) ; - // checking ssize + wsize > uint_max, but we don't want to overflow while checking for overflows. - // transforming the check to ssize > uint_max - wsize - if(ssize > (std::numeric_limits::max)() - wsize) - throw TProtocolException(TProtocolException::SIZE_LIMIT); - wsize += ssize; - trans_->write((uint8_t*)str.data(), ssize); - return wsize; -} - -// -// Internal Writing methods -// - -/** - * The workhorse of writeFieldBegin. It has the option of doing a - * 'type override' of the type header. This is used specifically in the - * boolean field case. - */ -template -int32_t TCompactProtocolT::writeFieldBeginInternal( - const char* name, - const TType fieldType, - const int16_t fieldId, - int8_t typeOverride) { - (void) name; - uint32_t wsize = 0; - - // if there's a type override, use that. - int8_t typeToWrite = (typeOverride == -1 ? getCompactType(fieldType) : typeOverride); - - // check if we can use delta encoding for the field id - if (fieldId > lastFieldId_ && fieldId - lastFieldId_ <= 15) { - // write them together - wsize += writeByte(static_cast((fieldId - lastFieldId_) - << 4 | typeToWrite)); - } else { - // write them separate - wsize += writeByte(typeToWrite); - wsize += writeI16(fieldId); - } - - lastFieldId_ = fieldId; - return wsize; -} - -/** - * Abstract method for writing the start of lists and sets. List and sets on - * the wire differ only by the type indicator. - */ -template -uint32_t TCompactProtocolT::writeCollectionBegin(const TType elemType, - int32_t size) { - uint32_t wsize = 0; - if (size <= 14) { - wsize += writeByte(static_cast(size - << 4 | getCompactType(elemType))); - } else { - wsize += writeByte(0xf0 | getCompactType(elemType)); - wsize += writeVarint32(size); - } - return wsize; -} - -/** - * Write an i32 as a varint. Results in 1-5 bytes on the wire. - */ -template -uint32_t TCompactProtocolT::writeVarint32(uint32_t n) { - uint8_t buf[5]; - uint32_t wsize = 0; - - while (true) { - if ((n & ~0x7F) == 0) { - buf[wsize++] = (int8_t)n; - break; - } else { - buf[wsize++] = (int8_t)((n & 0x7F) | 0x80); - n >>= 7; - } - } - trans_->write(buf, wsize); - return wsize; -} - -/** - * Write an i64 as a varint. Results in 1-10 bytes on the wire. - */ -template -uint32_t TCompactProtocolT::writeVarint64(uint64_t n) { - uint8_t buf[10]; - uint32_t wsize = 0; - - while (true) { - if ((n & ~0x7FL) == 0) { - buf[wsize++] = (int8_t)n; - break; - } else { - buf[wsize++] = (int8_t)((n & 0x7F) | 0x80); - n >>= 7; - } - } - trans_->write(buf, wsize); - return wsize; -} - -/** - * Convert l into a zigzag long. This allows negative numbers to be - * represented compactly as a varint. - */ -template -uint64_t TCompactProtocolT::i64ToZigzag(const int64_t l) { - return (static_cast(l) << 1) ^ (l >> 63); -} - -/** - * Convert n into a zigzag int. This allows negative numbers to be - * represented compactly as a varint. - */ -template -uint32_t TCompactProtocolT::i32ToZigzag(const int32_t n) { - return (static_cast(n) << 1) ^ (n >> 31); -} - -/** - * Given a TType value, find the appropriate detail::compact::Types value - */ -template -int8_t TCompactProtocolT::getCompactType(const TType ttype) { - return detail::compact::TTypeToCType[ttype]; -} - -// -// Reading Methods -// - -/** - * Read a message header. - */ -template -uint32_t TCompactProtocolT::readMessageBegin( - std::string& name, - TMessageType& messageType, - int32_t& seqid) { - uint32_t rsize = 0; - int8_t protocolId; - int8_t versionAndType; - int8_t version; - - rsize += readByte(protocolId); - if (protocolId != PROTOCOL_ID) { - throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol identifier"); - } - - rsize += readByte(versionAndType); - version = (int8_t)(versionAndType & VERSION_MASK); - if (version != VERSION_N) { - throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol version"); - } - - messageType = (TMessageType)((versionAndType >> TYPE_SHIFT_AMOUNT) & TYPE_BITS); - rsize += readVarint32(seqid); - rsize += readString(name); - - return rsize; -} - -/** - * Read a struct begin. There's nothing on the wire for this, but it is our - * opportunity to push a new struct begin marker on the field stack. - */ -template -uint32_t TCompactProtocolT::readStructBegin(std::string& name) { - name = ""; - lastField_.push(lastFieldId_); - lastFieldId_ = 0; - return 0; -} - -/** - * Doesn't actually consume any wire data, just removes the last field for - * this struct from the field stack. - */ -template -uint32_t TCompactProtocolT::readStructEnd() { - lastFieldId_ = lastField_.top(); - lastField_.pop(); - return 0; -} - -/** - * Read a field header off the wire. - */ -template -uint32_t TCompactProtocolT::readFieldBegin(std::string& name, - TType& fieldType, - int16_t& fieldId) { - (void) name; - uint32_t rsize = 0; - int8_t byte; - int8_t type; - - rsize += readByte(byte); - type = (byte & 0x0f); - - // if it's a stop, then we can return immediately, as the struct is over. - if (type == T_STOP) { - fieldType = T_STOP; - fieldId = 0; - return rsize; - } - - // mask off the 4 MSB of the type header. it could contain a field id delta. - int16_t modifier = (int16_t)(((uint8_t)byte & 0xf0) >> 4); - if (modifier == 0) { - // not a delta, look ahead for the zigzag varint field id. - rsize += readI16(fieldId); - } else { - fieldId = (int16_t)(lastFieldId_ + modifier); - } - fieldType = getTType(type); - - // if this happens to be a boolean field, the value is encoded in the type - if (type == detail::compact::CT_BOOLEAN_TRUE || - type == detail::compact::CT_BOOLEAN_FALSE) { - // save the boolean value in a special instance variable. - boolValue_.hasBoolValue = true; - boolValue_.boolValue = - (type == detail::compact::CT_BOOLEAN_TRUE ? true : false); - } - - // push the new field onto the field stack so we can keep the deltas going. - lastFieldId_ = fieldId; - return rsize; -} - -/** - * Read a map header off the wire. If the size is zero, skip reading the key - * and value type. This means that 0-length maps will yield TMaps without the - * "correct" types. - */ -template -uint32_t TCompactProtocolT::readMapBegin(TType& keyType, - TType& valType, - uint32_t& size) { - uint32_t rsize = 0; - int8_t kvType = 0; - int32_t msize = 0; - - rsize += readVarint32(msize); - if (msize != 0) - rsize += readByte(kvType); - - if (msize < 0) { - throw TProtocolException(TProtocolException::NEGATIVE_SIZE); - } else if (container_limit_ && msize > container_limit_) { - throw TProtocolException(TProtocolException::SIZE_LIMIT); - } - - keyType = getTType((int8_t)((uint8_t)kvType >> 4)); - valType = getTType((int8_t)((uint8_t)kvType & 0xf)); - size = (uint32_t)msize; - - return rsize; -} - -/** - * Read a list header off the wire. If the list size is 0-14, the size will - * be packed into the element type header. If it's a longer list, the 4 MSB - * of the element type header will be 0xF, and a varint will follow with the - * true size. - */ -template -uint32_t TCompactProtocolT::readListBegin(TType& elemType, - uint32_t& size) { - int8_t size_and_type; - uint32_t rsize = 0; - int32_t lsize; - - rsize += readByte(size_and_type); - - lsize = ((uint8_t)size_and_type >> 4) & 0x0f; - if (lsize == 15) { - rsize += readVarint32(lsize); - } - - if (lsize < 0) { - throw TProtocolException(TProtocolException::NEGATIVE_SIZE); - } else if (container_limit_ && lsize > container_limit_) { - throw TProtocolException(TProtocolException::SIZE_LIMIT); - } - - elemType = getTType((int8_t)(size_and_type & 0x0f)); - size = (uint32_t)lsize; - - return rsize; -} - -/** - * Read a set header off the wire. If the set size is 0-14, the size will - * be packed into the element type header. If it's a longer set, the 4 MSB - * of the element type header will be 0xF, and a varint will follow with the - * true size. - */ -template -uint32_t TCompactProtocolT::readSetBegin(TType& elemType, - uint32_t& size) { - return readListBegin(elemType, size); -} - -/** - * Read a boolean off the wire. If this is a boolean field, the value should - * already have been read during readFieldBegin, so we'll just consume the - * pre-stored value. Otherwise, read a byte. - */ -template -uint32_t TCompactProtocolT::readBool(bool& value) { - if (boolValue_.hasBoolValue == true) { - value = boolValue_.boolValue; - boolValue_.hasBoolValue = false; - return 0; - } else { - int8_t val; - readByte(val); - value = (val == detail::compact::CT_BOOLEAN_TRUE); - return 1; - } -} - -/** - * Read a single byte off the wire. Nothing interesting here. - */ -template -uint32_t TCompactProtocolT::readByte(int8_t& byte) { - uint8_t b[1]; - trans_->readAll(b, 1); - byte = *(int8_t*)b; - return 1; -} - -/** - * Read an i16 from the wire as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::readI16(int16_t& i16) { - int32_t value; - uint32_t rsize = readVarint32(value); - i16 = (int16_t)zigzagToI32(value); - return rsize; -} - -/** - * Read an i32 from the wire as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::readI32(int32_t& i32) { - int32_t value; - uint32_t rsize = readVarint32(value); - i32 = zigzagToI32(value); - return rsize; -} - -/** - * Read an i64 from the wire as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::readI64(int64_t& i64) { - int64_t value; - uint32_t rsize = readVarint64(value); - i64 = zigzagToI64(value); - return rsize; -} - -/** - * No magic here - just read a double off the wire. - */ -template -uint32_t TCompactProtocolT::readDouble(double& dub) { - //BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t)); - //BOOST_STATIC_ASSERT(std::numeric_limits::is_iec559); - - union { - uint64_t bits; - uint8_t b[8]; - } u; - trans_->readAll(u.b, 8); - u.bits = THRIFT_letohll(u.bits); - dub = bitwise_cast(u.bits); - return 8; -} - -template -uint32_t TCompactProtocolT::readString(std::string& str) { - return readBinary(str); -} - -/** - * Read a byte[] from the wire. - */ -template -uint32_t TCompactProtocolT::readBinary(std::string& str) { - int32_t rsize = 0; - int32_t size; - - rsize += readVarint32(size); - // Catch empty string case - if (size == 0) { - str = ""; - return rsize; - } - - // Catch error cases - if (size < 0) { - throw TProtocolException(TProtocolException::NEGATIVE_SIZE); - } - if (string_limit_ > 0 && size > string_limit_) { - throw TProtocolException(TProtocolException::SIZE_LIMIT); - } - - // Use the heap here to prevent stack overflow for v. large strings - if (size > string_buf_size_ || string_buf_ == NULL) { - void* new_string_buf = std::realloc(string_buf_, (uint32_t)size); - if (new_string_buf == NULL) { - throw std::bad_alloc(); - } - string_buf_ = (uint8_t*)new_string_buf; - string_buf_size_ = size; - } - trans_->readAll(string_buf_, size); - str.assign((char*)string_buf_, size); - - return rsize + (uint32_t)size; -} - -/** - * Read an i32 from the wire as a varint. The MSB of each byte is set - * if there is another byte to follow. This can read up to 5 bytes. - */ -template -uint32_t TCompactProtocolT::readVarint32(int32_t& i32) { - int64_t val; - uint32_t rsize = readVarint64(val); - i32 = (int32_t)val; - return rsize; -} - -/** - * Read an i64 from the wire as a proper varint. The MSB of each byte is set - * if there is another byte to follow. This can read up to 10 bytes. - */ -template -uint32_t TCompactProtocolT::readVarint64(int64_t& i64) { - uint32_t rsize = 0; - uint64_t val = 0; - int shift = 0; - uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes. - uint32_t buf_size = sizeof(buf); - const uint8_t* borrowed = trans_->borrow(buf, &buf_size); - - // Fast path. - if (borrowed != NULL) { - while (true) { - uint8_t byte = borrowed[rsize]; - rsize++; - val |= (uint64_t)(byte & 0x7f) << shift; - shift += 7; - if (!(byte & 0x80)) { - i64 = val; - trans_->consume(rsize); - return rsize; - } - // Have to check for invalid data so we don't crash. - if (UNLIKELY(rsize == sizeof(buf))) { - throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes."); - } - } - } - - // Slow path. - else { - while (true) { - uint8_t byte; - rsize += trans_->readAll(&byte, 1); - val |= (uint64_t)(byte & 0x7f) << shift; - shift += 7; - if (!(byte & 0x80)) { - i64 = val; - return rsize; - } - // Might as well check for invalid data on the slow path too. - if (UNLIKELY(rsize >= sizeof(buf))) { - throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes."); - } - } - } -} - -/** - * Convert from zigzag int to int. - */ -template -int32_t TCompactProtocolT::zigzagToI32(uint32_t n) { - return (n >> 1) ^ static_cast(-static_cast(n & 1)); -} - -/** - * Convert from zigzag long to long. - */ -template -int64_t TCompactProtocolT::zigzagToI64(uint64_t n) { - return (n >> 1) ^ static_cast(-static_cast(n & 1)); -} - -template -TType TCompactProtocolT::getTType(int8_t type) { - switch (type) { - case T_STOP: - return T_STOP; - case detail::compact::CT_BOOLEAN_FALSE: - case detail::compact::CT_BOOLEAN_TRUE: - return T_BOOL; - case detail::compact::CT_BYTE: - return T_BYTE; - case detail::compact::CT_I16: - return T_I16; - case detail::compact::CT_I32: - return T_I32; - case detail::compact::CT_I64: - return T_I64; - case detail::compact::CT_DOUBLE: - return T_DOUBLE; - case detail::compact::CT_BINARY: - return T_STRING; - case detail::compact::CT_LIST: - return T_LIST; - case detail::compact::CT_SET: - return T_SET; - case detail::compact::CT_MAP: - return T_MAP; - case detail::compact::CT_STRUCT: - return T_STRUCT; - default: - throw TException(std::string("don't know what type: ") + (char)type); - } -} - -}}} // duckdb_apache::thrift::protocol - -#endif // _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ - - -// LICENSE_CHANGE_END - - -#endif - - -// LICENSE_CHANGE_END - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ -#define _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ 1 - -#include -#include -#include -#include -//#include // FUCK OFF - - - - -// LICENSE_CHANGE_BEGIN -// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #2 -// See the end of this file for a list - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ -#define _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ 1 - - - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -/** - * Helper class that provides default implementations of TTransport methods. - * - * This class provides default implementations of read(), readAll(), write(), - * borrow() and consume(). - * - * In the TTransport base class, each of these methods simply invokes its - * virtual counterpart. This class overrides them to always perform the - * default behavior, without a virtual function call. - * - * The primary purpose of this class is to serve as a base class for - * TVirtualTransport, and prevent infinite recursion if one of its subclasses - * does not override the TTransport implementation of these methods. (Since - * TVirtualTransport::read_virt() calls read(), and TTransport::read() calls - * read_virt().) - */ -class TTransportDefaults : public TTransport { -public: - /* - * TTransport *_virt() methods provide reasonable default implementations. - * Invoke them non-virtually. - */ - uint32_t read(uint8_t* buf, uint32_t len) { return this->TTransport::read_virt(buf, len); } - uint32_t readAll(uint8_t* buf, uint32_t len) { return this->TTransport::readAll_virt(buf, len); } - void write(const uint8_t* buf, uint32_t len) { this->TTransport::write_virt(buf, len); } - const uint8_t* borrow(uint8_t* buf, uint32_t* len) { - return this->TTransport::borrow_virt(buf, len); - } - void consume(uint32_t len) { this->TTransport::consume_virt(len); } - -protected: - TTransportDefaults() = default; -}; - -/** - * Helper class to provide polymorphism for subclasses of TTransport. - * - * This class implements *_virt() methods of TTransport, to call the - * non-virtual versions of these functions in the proper subclass. - * - * To define your own transport class using TVirtualTransport: - * 1) Derive your subclass from TVirtualTransport - * e.g: class MyTransport : public TVirtualTransport { - * 2) Provide your own implementations of read(), readAll(), etc. - * These methods should be non-virtual. - * - * Transport implementations that need to use virtual inheritance when - * inheriting from TTransport cannot use TVirtualTransport. - * - * @author Chad Walters - */ -template -class TVirtualTransport : public Super_ { -public: - /* - * Implementations of the *_virt() functions, to call the subclass's - * non-virtual implementation function. - */ - uint32_t read_virt(uint8_t* buf, uint32_t len) override { - return static_cast(this)->read(buf, len); - } - - uint32_t readAll_virt(uint8_t* buf, uint32_t len) override { - return static_cast(this)->readAll(buf, len); - } - - void write_virt(const uint8_t* buf, uint32_t len) override { - static_cast(this)->write(buf, len); - } - - const uint8_t* borrow_virt(uint8_t* buf, uint32_t* len) override { - return static_cast(this)->borrow(buf, len); - } - - void consume_virt(uint32_t len) override { static_cast(this)->consume(len); } - - /* - * Provide a default readAll() implementation that invokes - * read() non-virtually. - * - * Note: subclasses that use TVirtualTransport to derive from another - * transport implementation (i.e., not TTransportDefaults) should beware that - * this may override any non-default readAll() implementation provided by - * the parent transport class. They may need to redefine readAll() to call - * the correct parent implementation, if desired. - */ - uint32_t readAll(uint8_t* buf, uint32_t len) { - auto* trans = static_cast(this); - return ::duckdb_apache::thrift::transport::readAll(*trans, buf, len); - } - -protected: - TVirtualTransport() = default; - - /* - * Templatized constructors, to allow arguments to be passed to the Super_ - * constructor. Currently we only support 0, 1, or 2 arguments, but - * additional versions can be added as needed. - */ - template - TVirtualTransport(Arg_ const& arg) - : Super_(arg) {} - - template - TVirtualTransport(Arg1_ const& a1, Arg2_ const& a2) - : Super_(a1, a2) {} -}; -} -} -} // duckdb_apache::thrift::transport - -#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ - - -// LICENSE_CHANGE_END - - -#ifdef __GNUC__ -#define TDB_LIKELY(val) (__builtin_expect((val), 1)) -#define TDB_UNLIKELY(val) (__builtin_expect((val), 0)) -#else -#define TDB_LIKELY(val) (val) -#define TDB_UNLIKELY(val) (val) -#endif - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -/** - * Base class for all transports that use read/write buffers for performance. - * - * TBufferBase is designed to implement the fast-path "memcpy" style - * operations that work in the common case. It does so with small and - * (eventually) nonvirtual, inlinable methods. TBufferBase is an abstract - * class. Subclasses are expected to define the "slow path" operations - * that have to be done when the buffers are full or empty. - * - */ -class TBufferBase : public TVirtualTransport { - -public: - /** - * Fast-path read. - * - * When we have enough data buffered to fulfill the read, we can satisfy it - * with a single memcpy, then adjust our internal pointers. If the buffer - * is empty, we call out to our slow path, implemented by a subclass. - * This method is meant to eventually be nonvirtual and inlinable. - */ - uint32_t read(uint8_t* buf, uint32_t len) { - uint8_t* new_rBase = rBase_ + len; - if (TDB_LIKELY(new_rBase <= rBound_)) { - std::memcpy(buf, rBase_, len); - rBase_ = new_rBase; - return len; - } - return readSlow(buf, len); - } - - /** - * Shortcutted version of readAll. - */ - uint32_t readAll(uint8_t* buf, uint32_t len) { - uint8_t* new_rBase = rBase_ + len; - if (TDB_LIKELY(new_rBase <= rBound_)) { - std::memcpy(buf, rBase_, len); - rBase_ = new_rBase; - return len; - } - return duckdb_apache::thrift::transport::readAll(*this, buf, len); - } - - /** - * Fast-path write. - * - * When we have enough empty space in our buffer to accommodate the write, we - * can satisfy it with a single memcpy, then adjust our internal pointers. - * If the buffer is full, we call out to our slow path, implemented by a - * subclass. This method is meant to eventually be nonvirtual and - * inlinable. - */ - void write(const uint8_t* buf, uint32_t len) { - uint8_t* new_wBase = wBase_ + len; - if (TDB_LIKELY(new_wBase <= wBound_)) { - std::memcpy(wBase_, buf, len); - wBase_ = new_wBase; - return; - } - writeSlow(buf, len); - } - - /** - * Fast-path borrow. A lot like the fast-path read. - */ - const uint8_t* borrow(uint8_t* buf, uint32_t* len) { - if (TDB_LIKELY(static_cast(*len) <= rBound_ - rBase_)) { - // With strict aliasing, writing to len shouldn't force us to - // refetch rBase_ from memory. TODO(dreiss): Verify this. - *len = static_cast(rBound_ - rBase_); - return rBase_; - } - return borrowSlow(buf, len); - } - - /** - * Consume doesn't require a slow path. - */ - void consume(uint32_t len) { - if (TDB_LIKELY(static_cast(len) <= rBound_ - rBase_)) { - rBase_ += len; - } else { - throw TTransportException(TTransportException::BAD_ARGS, "consume did not follow a borrow."); - } - } - -protected: - /// Slow path read. - virtual uint32_t readSlow(uint8_t* buf, uint32_t len) = 0; - - /// Slow path write. - virtual void writeSlow(const uint8_t* buf, uint32_t len) = 0; - - /** - * Slow path borrow. - * - * POSTCONDITION: return == NULL || rBound_ - rBase_ >= *len - */ - virtual const uint8_t* borrowSlow(uint8_t* buf, uint32_t* len) = 0; - - /** - * Trivial constructor. - * - * Initialize pointers safely. Constructing is not a very - * performance-sensitive operation, so it is okay to just leave it to - * the concrete class to set up pointers correctly. - */ - TBufferBase() : rBase_(nullptr), rBound_(nullptr), wBase_(nullptr), wBound_(nullptr) {} - - /// Convenience mutator for setting the read buffer. - void setReadBuffer(uint8_t* buf, uint32_t len) { - rBase_ = buf; - rBound_ = buf + len; - } - - /// Convenience mutator for setting the write buffer. - void setWriteBuffer(uint8_t* buf, uint32_t len) { - wBase_ = buf; - wBound_ = buf + len; - } - - ~TBufferBase() override = default; - - /// Reads begin here. - uint8_t* rBase_; - /// Reads may extend to just before here. - uint8_t* rBound_; - - /// Writes begin here. - uint8_t* wBase_; - /// Writes may extend to just before here. - uint8_t* wBound_; -}; - - -/** - * A memory buffer is a transport that simply reads from and writes to an - * in memory buffer. Anytime you call write on it, the data is simply placed - * into a buffer, and anytime you call read, data is read from that buffer. - * - * The buffers are allocated using C constructs malloc,realloc, and the size - * doubles as necessary. We've considered using scoped - * - */ -class TMemoryBuffer : public TVirtualTransport { -private: - // Common initialization done by all constructors. - void initCommon(uint8_t* buf, uint32_t size, bool owner, uint32_t wPos) { - - maxBufferSize_ = (std::numeric_limits::max)(); - - if (buf == nullptr && size != 0) { - assert(owner); - buf = (uint8_t*)std::malloc(size); - if (buf == nullptr) { - throw std::bad_alloc(); - } - } - - buffer_ = buf; - bufferSize_ = size; - - rBase_ = buffer_; - rBound_ = buffer_ + wPos; - // TODO(dreiss): Investigate NULL-ing this if !owner. - wBase_ = buffer_ + wPos; - wBound_ = buffer_ + bufferSize_; - - owner_ = owner; - - // rBound_ is really an artifact. In principle, it should always be - // equal to wBase_. We update it in a few places (computeRead, etc.). - } - -public: - static const uint32_t defaultSize = 1024; - - /** - * This enum specifies how a TMemoryBuffer should treat - * memory passed to it via constructors or resetBuffer. - * - * OBSERVE: - * TMemoryBuffer will simply store a pointer to the memory. - * It is the callers responsibility to ensure that the pointer - * remains valid for the lifetime of the TMemoryBuffer, - * and that it is properly cleaned up. - * Note that no data can be written to observed buffers. - * - * COPY: - * TMemoryBuffer will make an internal copy of the buffer. - * The caller has no responsibilities. - * - * TAKE_OWNERSHIP: - * TMemoryBuffer will become the "owner" of the buffer, - * and will be responsible for freeing it. - * The membory must have been allocated with malloc. - */ - enum MemoryPolicy { OBSERVE = 1, COPY = 2, TAKE_OWNERSHIP = 3 }; - - /** - * Construct a TMemoryBuffer with a default-sized buffer, - * owned by the TMemoryBuffer object. - */ - TMemoryBuffer() { initCommon(nullptr, defaultSize, true, 0); } - - /** - * Construct a TMemoryBuffer with a buffer of a specified size, - * owned by the TMemoryBuffer object. - * - * @param sz The initial size of the buffer. - */ - TMemoryBuffer(uint32_t sz) { initCommon(nullptr, sz, true, 0); } - - /** - * Construct a TMemoryBuffer with buf as its initial contents. - * - * @param buf The initial contents of the buffer. - * Note that, while buf is a non-const pointer, - * TMemoryBuffer will not write to it if policy == OBSERVE, - * so it is safe to const_cast(whatever). - * @param sz The size of @c buf. - * @param policy See @link MemoryPolicy @endlink . - */ - TMemoryBuffer(uint8_t* buf, uint32_t sz, MemoryPolicy policy = OBSERVE) { - if (buf == nullptr && sz != 0) { - throw TTransportException(TTransportException::BAD_ARGS, - "TMemoryBuffer given null buffer with non-zero size."); - } - - switch (policy) { - case OBSERVE: - case TAKE_OWNERSHIP: - initCommon(buf, sz, policy == TAKE_OWNERSHIP, sz); - break; - case COPY: - initCommon(nullptr, sz, true, 0); - this->write(buf, sz); - break; - default: - throw TTransportException(TTransportException::BAD_ARGS, - "Invalid MemoryPolicy for TMemoryBuffer"); - } - } - - ~TMemoryBuffer() override { - if (owner_) { - std::free(buffer_); - } - } - - bool isOpen() const override { return true; } - - bool peek() override { return (rBase_ < wBase_); } - - void open() override {} - - void close() override {} - - // TODO(dreiss): Make bufPtr const. - void getBuffer(uint8_t** bufPtr, uint32_t* sz) { - *bufPtr = rBase_; - *sz = static_cast(wBase_ - rBase_); - } - - std::string getBufferAsString() { - if (buffer_ == nullptr) { - return ""; - } - uint8_t* buf; - uint32_t sz; - getBuffer(&buf, &sz); - return std::string((char*)buf, (std::string::size_type)sz); - } - - void appendBufferToString(std::string& str) { - if (buffer_ == nullptr) { - return; - } - uint8_t* buf; - uint32_t sz; - getBuffer(&buf, &sz); - str.append((char*)buf, sz); - } - - void resetBuffer() { - rBase_ = buffer_; - rBound_ = buffer_; - wBase_ = buffer_; - // It isn't safe to write into a buffer we don't own. - if (!owner_) { - wBound_ = wBase_; - bufferSize_ = 0; - } - } - - /// See constructor documentation. - void resetBuffer(uint8_t* buf, uint32_t sz, MemoryPolicy policy = OBSERVE) { - // Use a variant of the copy-and-swap trick for assignment operators. - // This is sub-optimal in terms of performance for two reasons: - // 1/ The constructing and swapping of the (small) values - // in the temporary object takes some time, and is not necessary. - // 2/ If policy == COPY, we allocate the new buffer before - // freeing the old one, precluding the possibility of - // reusing that memory. - // I doubt that either of these problems could be optimized away, - // but the second is probably no a common case, and the first is minor. - // I don't expect resetBuffer to be a common operation, so I'm willing to - // bite the performance bullet to make the method this simple. - - // Construct the new buffer. - TMemoryBuffer new_buffer(buf, sz, policy); - // Move it into ourself. - this->swap(new_buffer); - // Our old self gets destroyed. - } - - /// See constructor documentation. - void resetBuffer(uint32_t sz) { - // Construct the new buffer. - TMemoryBuffer new_buffer(sz); - // Move it into ourself. - this->swap(new_buffer); - // Our old self gets destroyed. - } - - std::string readAsString(uint32_t len) { - std::string str; - (void)readAppendToString(str, len); - return str; - } - - uint32_t readAppendToString(std::string& str, uint32_t len); - - // return number of bytes read - uint32_t readEnd() override { - // This cast should be safe, because buffer_'s size is a uint32_t - auto bytes = static_cast(rBase_ - buffer_); - if (rBase_ == wBase_) { - resetBuffer(); - } - return bytes; - } - - // Return number of bytes written - uint32_t writeEnd() override { - // This cast should be safe, because buffer_'s size is a uint32_t - return static_cast(wBase_ - buffer_); - } - - uint32_t available_read() const { - // Remember, wBase_ is the real rBound_. - return static_cast(wBase_ - rBase_); - } - - uint32_t available_write() const { return static_cast(wBound_ - wBase_); } - - // Returns a pointer to where the client can write data to append to - // the TMemoryBuffer, and ensures the buffer is big enough to accommodate a - // write of the provided length. The returned pointer is very convenient for - // passing to read(), recv(), or similar. You must call wroteBytes() as soon - // as data is written or the buffer will not be aware that data has changed. - uint8_t* getWritePtr(uint32_t len) { - ensureCanWrite(len); - return wBase_; - } - - // Informs the buffer that the client has written 'len' bytes into storage - // that had been provided by getWritePtr(). - void wroteBytes(uint32_t len); - - /* - * TVirtualTransport provides a default implementation of readAll(). - * We want to use the TBufferBase version instead. - */ - uint32_t readAll(uint8_t* buf, uint32_t len) { return TBufferBase::readAll(buf, len); } - - //! \brief Get the current buffer size - //! \returns the current buffer size - uint32_t getBufferSize() const { - return bufferSize_; - } - - //! \brief Get the current maximum buffer size - //! \returns the current maximum buffer size - uint32_t getMaxBufferSize() const { - return maxBufferSize_; - } - - //! \brief Change the maximum buffer size - //! \param[in] maxSize the new maximum buffer size allowed to grow to - //! \throws TTransportException(BAD_ARGS) if maxSize is less than the current buffer size - void setMaxBufferSize(uint32_t maxSize) { - if (maxSize < bufferSize_) { - throw TTransportException(TTransportException::BAD_ARGS, - "Maximum buffer size would be less than current buffer size"); - } - maxBufferSize_ = maxSize; - } - -protected: - void swap(TMemoryBuffer& that) { - using std::swap; - swap(buffer_, that.buffer_); - swap(bufferSize_, that.bufferSize_); - - swap(rBase_, that.rBase_); - swap(rBound_, that.rBound_); - swap(wBase_, that.wBase_); - swap(wBound_, that.wBound_); - - swap(owner_, that.owner_); - } - - // Make sure there's at least 'len' bytes available for writing. - void ensureCanWrite(uint32_t len); - - // Compute the position and available data for reading. - void computeRead(uint32_t len, uint8_t** out_start, uint32_t* out_give); - - uint32_t readSlow(uint8_t* buf, uint32_t len) override; - - void writeSlow(const uint8_t* buf, uint32_t len) override; - - const uint8_t* borrowSlow(uint8_t* buf, uint32_t* len) override; - - // Data buffer - uint8_t* buffer_; - - // Allocated buffer size - uint32_t bufferSize_; - - // Maximum allowed size - uint32_t maxBufferSize_; - - // Is this object the owner of the buffer? - bool owner_; - - // Don't forget to update constrctors, initCommon, and swap if - // you add new members. -}; -} -} -} // duckdb_apache::thrift::transport - -#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ - - -// LICENSE_CHANGE_END - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/allocator.hpp" -#endif - -namespace duckdb { - -// A ReadHead for prefetching data in a specific range -struct ReadHead { - ReadHead(idx_t location, uint64_t size) : location(location), size(size) {}; - // Hint info - idx_t location; - uint64_t size; - - // Current info - unique_ptr data; - bool data_isset = false; - - idx_t GetEnd() const { - return size + location; - } - - void Allocate(Allocator &allocator) { - data = allocator.Allocate(size); - } -}; - -// Comparator for ReadHeads that are either overlapping, adjacent, or within ALLOW_GAP bytes from each other -struct ReadHeadComparator { - static constexpr uint64_t ALLOW_GAP = 1 << 14; // 16 KiB - bool operator()(const ReadHead *a, const ReadHead *b) const { - auto a_start = a->location; - auto a_end = a->location + a->size; - auto b_start = b->location; - - if (a_end <= NumericLimits::Maximum() - ALLOW_GAP) { - a_end += ALLOW_GAP; - } - - return a_start < b_start && a_end < b_start; - } -}; - -// Two-step read ahead buffer -// 1: register all ranges that will be read, merging ranges that are consecutive -// 2: prefetch all registered ranges -struct ReadAheadBuffer { - ReadAheadBuffer(Allocator &allocator, FileHandle &handle, FileOpener &opener) - : allocator(allocator), handle(handle), file_opener(opener) { - } - - // The list of read heads - std::list read_heads; - // Set for merging consecutive ranges - std::set merge_set; - - Allocator &allocator; - FileHandle &handle; - FileOpener &file_opener; - - idx_t total_size = 0; - - // Add a read head to the prefetching list - void AddReadHead(idx_t pos, uint64_t len, bool merge_buffers = true) { - // Attempt to merge with existing - if (merge_buffers) { - ReadHead new_read_head {pos, len}; - auto lookup_set = merge_set.find(&new_read_head); - if (lookup_set != merge_set.end()) { - auto existing_head = *lookup_set; - auto new_start = MinValue(existing_head->location, new_read_head.location); - auto new_length = MaxValue(existing_head->GetEnd(), new_read_head.GetEnd()) - new_start; - existing_head->location = new_start; - existing_head->size = new_length; - return; - } - } - - read_heads.emplace_front(ReadHead(pos, len)); - total_size += len; - auto &read_head = read_heads.front(); - - if (merge_buffers) { - merge_set.insert(&read_head); - } - - if (read_head.GetEnd() > handle.GetFileSize()) { - throw std::runtime_error("Prefetch registered for bytes outside file"); - } - } - - // Returns the relevant read head - ReadHead *GetReadHead(idx_t pos) { - for (auto &read_head : read_heads) { - if (pos >= read_head.location && pos < read_head.GetEnd()) { - return &read_head; - } - } - return nullptr; - } - - // Prefetch all read heads - void Prefetch() { - for (auto &read_head : read_heads) { - read_head.Allocate(allocator); - - if (read_head.GetEnd() > handle.GetFileSize()) { - throw std::runtime_error("Prefetch registered requested for bytes outside file"); - } - - handle.Read(read_head.data->get(), read_head.size, read_head.location); - read_head.data_isset = true; - } - } -}; - -class ThriftFileTransport : public duckdb_apache::thrift::transport::TVirtualTransport { -public: - static constexpr uint64_t PREFETCH_FALLBACK_BUFFERSIZE = 1000000; - - ThriftFileTransport(Allocator &allocator, FileHandle &handle_p, FileOpener &opener, bool prefetch_mode_p) - : handle(handle_p), location(0), allocator(allocator), ra_buffer(ReadAheadBuffer(allocator, handle_p, opener)), - prefetch_mode(prefetch_mode_p) { - } - - uint32_t read(uint8_t *buf, uint32_t len) { - auto prefetch_buffer = ra_buffer.GetReadHead(location); - if (prefetch_buffer != nullptr && location - prefetch_buffer->location + len <= prefetch_buffer->size) { - D_ASSERT(location - prefetch_buffer->location + len <= prefetch_buffer->size); - - if (!prefetch_buffer->data_isset) { - prefetch_buffer->Allocate(allocator); - handle.Read(prefetch_buffer->data->get(), prefetch_buffer->size, prefetch_buffer->location); - prefetch_buffer->data_isset = true; - } - memcpy(buf, prefetch_buffer->data->get() + location - prefetch_buffer->location, len); - } else { - if (prefetch_mode && len < PREFETCH_FALLBACK_BUFFERSIZE && len > 0) { - Prefetch(location, MinValue(PREFETCH_FALLBACK_BUFFERSIZE, handle.GetFileSize() - location)); - auto prefetch_buffer_fallback = ra_buffer.GetReadHead(location); - D_ASSERT(location - prefetch_buffer_fallback->location + len <= prefetch_buffer_fallback->size); - memcpy(buf, prefetch_buffer_fallback->data->get() + location - prefetch_buffer_fallback->location, len); - } else { - handle.Read(buf, len, location); - } - } - location += len; - return len; - } - - // Prefetch a single buffer - void Prefetch(idx_t pos, uint64_t len) { - RegisterPrefetch(pos, len, false); - FinalizeRegistration(); - PrefetchRegistered(); - } - - // Register a buffer for prefixing - void RegisterPrefetch(idx_t pos, uint64_t len, bool can_merge = true) { - ra_buffer.AddReadHead(pos, len, can_merge); - } - - // Prevents any further merges, should be called before PrefetchRegistered - void FinalizeRegistration() { - ra_buffer.merge_set.clear(); - } - - // Prefetch all previously registered ranges - void PrefetchRegistered() { - ra_buffer.Prefetch(); - } - - void ClearPrefetch() { - ra_buffer.read_heads.clear(); - ra_buffer.merge_set.clear(); - } - - void SetLocation(idx_t location_p) { - location = location_p; - } - - idx_t GetLocation() { - return location; - } - idx_t GetSize() { - return handle.file_system.GetFileSize(handle); - } - -private: - FileHandle &handle; - idx_t location; - - Allocator &allocator; - - // Multi-buffer prefetch - ReadAheadBuffer ra_buffer; - - // Whether the prefetch mode is enabled. In this mode the DirectIO flag of the handle will be set and the parquet - // reader will manage the read buffering. - bool prefetch_mode; -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// resizable_buffer.hpp -// -// -//===----------------------------------------------------------------------===// - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/allocator.hpp" -#endif - -#include - -namespace duckdb { - -class ByteBuffer { // on to the 10 thousandth impl -public: - ByteBuffer() {}; - ByteBuffer(char *ptr, uint64_t len) : ptr(ptr), len(len) {}; - - char *ptr = nullptr; - uint64_t len = 0; - -public: - void inc(uint64_t increment) { - available(increment); - len -= increment; - ptr += increment; - } - - template - T read() { - T val = get(); - inc(sizeof(T)); - return val; - } - - template - T get() { - available(sizeof(T)); - T val = Load((data_ptr_t)ptr); - return val; - } - - void copy_to(char *dest, uint64_t len) { - available(len); - std::memcpy(dest, ptr, len); - } - - void zero() { - std::memset(ptr, 0, len); - } - - void available(uint64_t req_len) { - if (req_len > len) { - throw std::runtime_error("Out of buffer"); - } - } -}; - -class ResizeableBuffer : public ByteBuffer { -public: - ResizeableBuffer() { - } - ResizeableBuffer(Allocator &allocator, uint64_t new_size) { - resize(allocator, new_size); - } - void resize(Allocator &allocator, uint64_t new_size) { - len = new_size; - if (new_size == 0) { - return; - } - if (new_size > alloc_len) { - alloc_len = new_size; - allocated_data = allocator.Allocate(alloc_len); - ptr = (char *)allocated_data->get(); - } - } - -private: - unique_ptr allocated_data; - idx_t alloc_len = 0; -}; - -} // namespace duckdb - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_rle_bp_decoder.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - - - -namespace duckdb { -class ParquetDecodeUtils { - -public: - template - static T ZigzagToInt(const T n) { - return (n >> 1) ^ -(n & 1); - } - - static const uint32_t BITPACK_MASKS[]; - static const uint8_t BITPACK_DLEN; - - template - static uint32_t BitUnpack(ByteBuffer &buffer, uint8_t &bitpack_pos, T *dest, uint32_t count, uint8_t width) { - auto mask = BITPACK_MASKS[width]; - - for (uint32_t i = 0; i < count; i++) { - T val = (buffer.get() >> bitpack_pos) & mask; - bitpack_pos += width; - while (bitpack_pos > BITPACK_DLEN) { - buffer.inc(1); - val |= (buffer.get() << (BITPACK_DLEN - (bitpack_pos - width))) & mask; - bitpack_pos -= BITPACK_DLEN; - } - dest[i] = val; - } - return count; - } - - template - static T VarintDecode(ByteBuffer &buf) { - T result = 0; - uint8_t shift = 0; - while (true) { - auto byte = buf.read(); - result |= (byte & 127) << shift; - if ((byte & 128) == 0) - break; - shift += 7; - if (shift > sizeof(T) * 8) { - throw std::runtime_error("Varint-decoding found too large number"); - } - } - return result; - } -}; -} // namespace duckdb - - -namespace duckdb { - -class RleBpDecoder { -public: - /// Create a decoder object. buffer/buffer_len is the decoded data. - /// bit_width is the width of each value (before encoding). - RleBpDecoder(const uint8_t *buffer, uint32_t buffer_len, uint32_t bit_width) - : buffer_((char *)buffer, buffer_len), bit_width_(bit_width), current_value_(0), repeat_count_(0), - literal_count_(0) { - if (bit_width >= 64) { - throw std::runtime_error("Decode bit width too large"); - } - byte_encoded_len = ((bit_width_ + 7) / 8); - max_val = (uint64_t(1) << bit_width_) - 1; - } - - template - void GetBatch(char *values_target_ptr, uint32_t batch_size) { - auto values = (T *)values_target_ptr; - uint32_t values_read = 0; - - while (values_read < batch_size) { - if (repeat_count_ > 0) { - int repeat_batch = MinValue(batch_size - values_read, static_cast(repeat_count_)); - std::fill(values + values_read, values + values_read + repeat_batch, static_cast(current_value_)); - repeat_count_ -= repeat_batch; - values_read += repeat_batch; - } else if (literal_count_ > 0) { - uint32_t literal_batch = MinValue(batch_size - values_read, static_cast(literal_count_)); - uint32_t actual_read = ParquetDecodeUtils::BitUnpack(buffer_, bitpack_pos, values + values_read, - literal_batch, bit_width_); - if (literal_batch != actual_read) { - throw std::runtime_error("Did not find enough values"); - } - literal_count_ -= literal_batch; - values_read += literal_batch; - } else { - if (!NextCounts()) { - if (values_read != batch_size) { - throw std::runtime_error("RLE decode did not find enough values"); - } - return; - } - } - } - if (values_read != batch_size) { - throw std::runtime_error("RLE decode did not find enough values"); - } - } - - static uint8_t ComputeBitWidth(idx_t val) { - if (val == 0) { - return 0; - } - uint8_t ret = 1; - while (((idx_t)(1 << ret) - 1) < val) { - ret++; - } - return ret; - } - -private: - ByteBuffer buffer_; - - /// Number of bits needed to encode the value. Must be between 0 and 64. - uint32_t bit_width_; - uint64_t current_value_; - uint32_t repeat_count_; - uint32_t literal_count_; - uint8_t byte_encoded_len; - uint64_t max_val; - - uint8_t bitpack_pos = 0; - - /// Fills literal_count_ and repeat_count_ with next values. Returns false if there - /// are no more. - template - bool NextCounts() { - // Read the next run's indicator int, it could be a literal or repeated run. - // The int is encoded as a vlq-encoded value. - if (bitpack_pos != 0) { - buffer_.inc(1); - bitpack_pos = 0; - } - auto indicator_value = ParquetDecodeUtils::VarintDecode(buffer_); - - // lsb indicates if it is a literal run or repeated run - bool is_literal = indicator_value & 1; - if (is_literal) { - literal_count_ = (indicator_value >> 1) * 8; - } else { - repeat_count_ = indicator_value >> 1; - // (ARROW-4018) this is not big-endian compatible, lol - current_value_ = 0; - for (auto i = 0; i < byte_encoded_len; i++) { - current_value_ |= (buffer_.read() << (i * 8)); - } - // sanity check - if (repeat_count_ > 0 && current_value_ > max_val) { - throw std::runtime_error("Payload value bigger than allowed. Corrupted file?"); - } - } - // TODO complain if we run out of buffer - return true; - } -}; -} // namespace duckdb - - - - -namespace duckdb { -class DbpDecoder { -public: - DbpDecoder(const uint8_t *buffer, uint32_t buffer_len) : buffer_((char *)buffer, buffer_len) { - - // - // overall header - block_value_count = ParquetDecodeUtils::VarintDecode(buffer_); - miniblocks_per_block = ParquetDecodeUtils::VarintDecode(buffer_); - total_value_count = ParquetDecodeUtils::VarintDecode(buffer_); - start_value = ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode(buffer_)); - - // some derivatives - values_per_miniblock = block_value_count / miniblocks_per_block; - miniblock_bit_widths = std::unique_ptr(new data_t[miniblocks_per_block]); - - // init state to something sane - values_left_in_block = 0; - values_left_in_miniblock = 0; - miniblock_offset = 0; - min_delta = 0; - bitpack_pos = 0; - is_first_value = true; - }; - - ByteBuffer BufferPtr() { - if (bitpack_pos != 0) { - buffer_.inc(1); - bitpack_pos = 0; - } - return buffer_; - } - - template - void GetBatch(char *values_target_ptr, uint32_t batch_size) { - auto values = (T *)values_target_ptr; - - if (batch_size == 0) { - return; - } - idx_t value_offset = 0; - - if (is_first_value) { - values[0] = start_value; - value_offset++; - is_first_value = false; - } - - if (total_value_count == 1) { // I guess it's a special case - if (batch_size > 1) { - throw std::runtime_error("DBP decode did not find enough values (have 1)"); - } - return; - } - - while (value_offset < batch_size) { - if (values_left_in_block == 0) { // need to open new block - if (bitpack_pos > 0) { // have to eat the leftovers if any - buffer_.inc(1); - } - min_delta = ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode(buffer_)); - for (idx_t miniblock_idx = 0; miniblock_idx < miniblocks_per_block; miniblock_idx++) { - miniblock_bit_widths[miniblock_idx] = buffer_.read(); - // TODO what happens if width is 0? - } - values_left_in_block = block_value_count; - miniblock_offset = 0; - bitpack_pos = 0; - values_left_in_miniblock = values_per_miniblock; - } - if (values_left_in_miniblock == 0) { - miniblock_offset++; - values_left_in_miniblock = values_per_miniblock; - } - - auto read_now = MinValue(values_left_in_miniblock, (idx_t)batch_size - value_offset); - ParquetDecodeUtils::BitUnpack(buffer_, bitpack_pos, &values[value_offset], read_now, - miniblock_bit_widths[miniblock_offset]); - for (idx_t i = value_offset; i < value_offset + read_now; i++) { - values[i] = ((i == 0) ? start_value : values[i - 1]) + min_delta + values[i]; - } - value_offset += read_now; - values_left_in_miniblock -= read_now; - values_left_in_block -= read_now; - } - - if (value_offset != batch_size) { - throw std::runtime_error("DBP decode did not find enough values"); - } - start_value = values[batch_size - 1]; - } - -private: - ByteBuffer buffer_; - idx_t block_value_count; - idx_t miniblocks_per_block; - idx_t total_value_count; - int64_t start_value; - idx_t values_per_miniblock; - - std::unique_ptr miniblock_bit_widths; - idx_t values_left_in_block; - idx_t values_left_in_miniblock; - idx_t miniblock_offset; - int64_t min_delta; - - bool is_first_value; - - uint8_t bitpack_pos; -}; -} // namespace duckdb - - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/storage/statistics/base_statistics.hpp" -#endif - - -namespace duckdb { - -using duckdb_parquet::format::ColumnChunk; -using duckdb_parquet::format::SchemaElement; - -struct LogicalType; - -struct ParquetStatisticsUtils { - - static unique_ptr TransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type, - const ColumnChunk &column_chunk); - - static Value ConvertValue(const LogicalType &type, const duckdb_parquet::format::SchemaElement &schema_ele, - const std::string &stats); -}; - -} // namespace duckdb - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/storage/statistics/string_statistics.hpp" -#include "duckdb/storage/statistics/numeric_statistics.hpp" -#include "duckdb/common/types/vector.hpp" -#include "duckdb/common/types/string_type.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#include "duckdb/common/operator/cast_operators.hpp" -#include "duckdb/common/types/vector_cache.hpp" -#endif - -namespace duckdb { -class ParquetReader; - -using duckdb_apache::thrift::protocol::TProtocol; - -using duckdb_parquet::format::ColumnChunk; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::PageHeader; -using duckdb_parquet::format::SchemaElement; -using duckdb_parquet::format::Type; - -typedef std::bitset parquet_filter_t; - -class ColumnReader { -public: - ColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p); - virtual ~ColumnReader(); - -public: - static unique_ptr CreateReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define, - idx_t max_repeat); - virtual void InitializeRead(const std::vector &columns, TProtocol &protocol_p); - virtual idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out, - Vector &result_out); - - virtual void Skip(idx_t num_values); - - ParquetReader &Reader(); - const LogicalType &Type() const; - const SchemaElement &Schema() const; - idx_t FileIdx() const; - idx_t MaxDefine() const; - idx_t MaxRepeat() const; - - virtual idx_t FileOffset() const; - virtual uint64_t TotalCompressedSize(); - virtual idx_t GroupRowsAvailable(); - - // register the range this reader will touch for prefetching - virtual void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge); - - virtual unique_ptr Stats(const std::vector &columns); - -protected: - // readers that use the default Read() need to implement those - virtual void Plain(shared_ptr plain_data, uint8_t *defines, idx_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result); - virtual void Dictionary(shared_ptr dictionary_data, idx_t num_entries); - virtual void Offsets(uint32_t *offsets, uint8_t *defines, idx_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result); - - // these are nops for most types, but not for strings - virtual void DictReference(Vector &result); - virtual void PlainReference(shared_ptr, Vector &result); - - // applies any skips that were registered using Skip() - virtual void ApplyPendingSkips(idx_t num_values); - - bool HasDefines() { - return max_define > 0; - } - - bool HasRepeats() { - return max_repeat > 0; - } - -protected: - const SchemaElement &schema; - - idx_t file_idx; - idx_t max_define; - idx_t max_repeat; - - ParquetReader &reader; - LogicalType type; - - idx_t pending_skips = 0; - -private: - void PrepareRead(parquet_filter_t &filter); - void PreparePage(idx_t compressed_page_size, idx_t uncompressed_page_size); - void PrepareDataPage(PageHeader &page_hdr); - void PreparePageV2(PageHeader &page_hdr); - - const duckdb_parquet::format::ColumnChunk *chunk = nullptr; - - duckdb_apache::thrift::protocol::TProtocol *protocol; - idx_t page_rows_available; - idx_t group_rows_available; - idx_t chunk_read_offset; - - shared_ptr block; - - ResizeableBuffer offset_buffer; - - unique_ptr dict_decoder; - unique_ptr defined_decoder; - unique_ptr repeated_decoder; - unique_ptr dbp_decoder; - - // dummies for Skip() - parquet_filter_t none_filter; - ResizeableBuffer dummy_define; - ResizeableBuffer dummy_repeat; -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_file_metadata_cache.hpp -// -// -//===----------------------------------------------------------------------===// - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/storage/object_cache.hpp" -#endif - - -namespace duckdb { - -//! ParquetFileMetadataCache -class ParquetFileMetadataCache : public ObjectCacheEntry { -public: - ParquetFileMetadataCache() : metadata(nullptr) { - } - ParquetFileMetadataCache(std::unique_ptr file_metadata, time_t r_time) - : metadata(std::move(file_metadata)), read_time(r_time) { - } - - ~ParquetFileMetadataCache() override = default; - - //! Parquet file metadata - std::unique_ptr metadata; - - //! read time - time_t read_time; - -public: - static string ObjectType() { - return "parquet_metadata"; - } - - string GetObjectType() override { - return ObjectType(); - } -}; -} // namespace duckdb - - - - - -#include - -namespace duckdb_parquet { -namespace format { -class FileMetaData; -} -} // namespace duckdb_parquet - -namespace duckdb { -class Allocator; -class ClientContext; -class ChunkCollection; -class BaseStatistics; -class TableFilterSet; - -struct ParquetReaderPrefetchConfig { - // Percentage of data in a row group span that should be scanned for enabling whole group prefetch - static constexpr double WHOLE_GROUP_PREFETCH_MINIMUM_SCAN = 0.95; -}; - -struct ParquetReaderScanState { - vector group_idx_list; - int64_t current_group; - vector column_ids; - idx_t group_offset; - unique_ptr file_handle; - unique_ptr root_reader; - unique_ptr thrift_file_proto; - - bool finished; - TableFilterSet *filters; - SelectionVector sel; - - ResizeableBuffer define_buf; - ResizeableBuffer repeat_buf; - - bool prefetch_mode = false; - bool current_group_prefetched = false; -}; - -struct ParquetOptions { - explicit ParquetOptions() { - } - explicit ParquetOptions(ClientContext &context); - - bool binary_as_string = false; -}; - -class ParquetReader { -public: - ParquetReader(Allocator &allocator, unique_ptr file_handle_p, - const vector &expected_types_p, const string &initial_filename_p = string()); - ParquetReader(Allocator &allocator, unique_ptr file_handle_p) - : ParquetReader(allocator, std::move(file_handle_p), vector(), string()) { - } - - ParquetReader(ClientContext &context, string file_name, const vector &names, - const vector &expected_types_p, const vector &column_ids, - ParquetOptions parquet_options, const string &initial_filename = string()); - ParquetReader(ClientContext &context, string file_name, ParquetOptions parquet_options) - : ParquetReader(context, std::move(file_name), vector(), vector(), vector(), - parquet_options, string()) { - } - ParquetReader(ClientContext &context, string file_name, const vector &expected_types_p, - ParquetOptions parquet_options) - : ParquetReader(context, std::move(file_name), vector(), expected_types_p, vector(), - parquet_options, string()) { - } - ~ParquetReader(); - - Allocator &allocator; - string file_name; - FileOpener *file_opener; - vector return_types; - vector names; - shared_ptr metadata; - ParquetOptions parquet_options; - -public: - void InitializeScan(ParquetReaderScanState &state, vector column_ids, vector groups_to_read, - TableFilterSet *table_filters); - void Scan(ParquetReaderScanState &state, DataChunk &output); - - idx_t NumRows(); - idx_t NumRowGroups(); - - const duckdb_parquet::format::FileMetaData *GetFileMetadata(); - - static unique_ptr ReadStatistics(ParquetReader &reader, LogicalType &type, column_t column_index, - const duckdb_parquet::format::FileMetaData *file_meta_data); - static LogicalType DeriveLogicalType(const SchemaElement &s_ele, bool binary_as_string); - -private: - void InitializeSchema(const vector &names, const vector &expected_types_p, - const vector &column_ids, const string &initial_filename_p); - bool ScanInternal(ParquetReaderScanState &state, DataChunk &output); - unique_ptr CreateReader(const duckdb_parquet::format::FileMetaData *file_meta_data); - - unique_ptr CreateReaderRecursive(const duckdb_parquet::format::FileMetaData *file_meta_data, - idx_t depth, idx_t max_define, idx_t max_repeat, - idx_t &next_schema_idx, idx_t &next_file_idx); - const duckdb_parquet::format::RowGroup &GetGroup(ParquetReaderScanState &state); - uint64_t GetGroupCompressedSize(ParquetReaderScanState &state); - idx_t GetGroupOffset(ParquetReaderScanState &state); - // Group span is the distance between the min page offset and the max page offset plus the max page compressed size - uint64_t GetGroupSpan(ParquetReaderScanState &state); - void PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx); - LogicalType DeriveLogicalType(const SchemaElement &s_ele); - - template - std::runtime_error FormatException(const string fmt_str, Args... params) { - return std::runtime_error("Failed to read Parquet file \"" + file_name + - "\": " + StringUtil::Format(fmt_str, params...)); - } - -private: - unique_ptr file_handle; - //! column-id map, used when reading multiple parquet files since separate parquet files might have columns at - //! different positions e.g. the first file might have column "a" at position 0, the second at position 1, etc - vector column_id_map; - //! Map of column_id -> cast, used when reading multiple parquet files when parquet files have diverging types - //! for the same column - unordered_map cast_map; -}; - -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_writer.hpp -// -// -//===----------------------------------------------------------------------===// - - - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/common.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/common/mutex.hpp" -#include "duckdb/common/serializer/buffered_file_writer.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#endif - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// column_writer.hpp -// -// -//===----------------------------------------------------------------------===// - - - -#include "duckdb.hpp" - - -namespace duckdb { -class BufferedSerializer; -class ParquetWriter; -class ColumnWriterPageState; -class StandardColumnWriterState; - -class ColumnWriterState { -public: - virtual ~ColumnWriterState(); - - vector definition_levels; - vector repetition_levels; - vector is_empty; -}; - -class ColumnWriterStatistics { -public: - virtual ~ColumnWriterStatistics(); - - virtual string GetMin(); - virtual string GetMax(); - virtual string GetMinValue(); - virtual string GetMaxValue(); -}; - -class ColumnWriter { - //! We limit the uncompressed page size to 100MB - // The max size in Parquet is 2GB, but we choose a more conservative limit - static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000; - -public: - ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path, idx_t max_repeat, - idx_t max_define, bool can_have_nulls); - virtual ~ColumnWriter(); - - ParquetWriter &writer; - idx_t schema_idx; - vector schema_path; - idx_t max_repeat; - idx_t max_define; - bool can_have_nulls; - // collected stats - idx_t null_count; - -public: - //! Create the column writer for a specific type recursively - static unique_ptr CreateWriterRecursive(vector &schemas, - ParquetWriter &writer, const LogicalType &type, - const string &name, vector schema_path, - idx_t max_repeat = 0, idx_t max_define = 1, - bool can_have_nulls = true); - - virtual unique_ptr InitializeWriteState(duckdb_parquet::format::RowGroup &row_group); - virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count); - - virtual void BeginWrite(ColumnWriterState &state); - virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count); - virtual void FinalizeWrite(ColumnWriterState &state); - -protected: - void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count, - uint16_t define_value, uint16_t null_value); - void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat); - - void WriteLevels(Serializer &temp_writer, const vector &levels, idx_t max_value, idx_t start_offset, - idx_t count); - - virtual duckdb_parquet::format::Encoding::type GetEncoding(); - - void NextPage(ColumnWriterState &state_p); - void FlushPage(ColumnWriterState &state_p); - void WriteDictionary(ColumnWriterState &state_p, unique_ptr temp_writer, idx_t row_count); - - virtual void FlushDictionary(ColumnWriterState &state, ColumnWriterStatistics *stats); - - //! Initializes the state used to track statistics during writing. Only used for scalar types. - virtual unique_ptr InitializeStatsState(); - //! Retrieves the row size of a vector at the specified location. Only used for scalar types. - virtual idx_t GetRowSize(Vector &vector, idx_t index); - //! Writes a (subset of a) vector to the specified serializer. Only used for scalar types. - virtual void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state, - Vector &vector, idx_t chunk_start, idx_t chunk_end); - - //! Initialize the writer for a specific page. Only used for scalar types. - virtual unique_ptr InitializePageState(); - //! Flushes the writer for a specific page. Only used for scalar types. - virtual void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state); - - void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data, - unique_ptr &compressed_buf); - - void SetParquetStatistics(StandardColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column); -}; - -} // namespace duckdb - - - -namespace duckdb { -class FileSystem; -class FileOpener; - -class ParquetWriter { - friend class ColumnWriter; - friend class ListColumnWriter; - friend class StructColumnWriter; - -public: - ParquetWriter(FileSystem &fs, string file_name, FileOpener *file_opener, vector types, - vector names, duckdb_parquet::format::CompressionCodec::type codec); - -public: - void Flush(ChunkCollection &buffer); - void Finalize(); - - static duckdb_parquet::format::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type); - static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::format::SchemaElement &schema_ele); - -private: - string file_name; - vector sql_types; - vector column_names; - duckdb_parquet::format::CompressionCodec::type codec; - - unique_ptr writer; - shared_ptr protocol; - duckdb_parquet::format::FileMetaData file_meta_data; - std::mutex lock; - - vector> column_writers; -}; - -} // namespace duckdb diff --git a/velox/external/duckdb/parquet-extension.hpp b/velox/external/duckdb/parquet-extension.hpp deleted file mode 100644 index 42680b0d47bd..000000000000 --- a/velox/external/duckdb/parquet-extension.hpp +++ /dev/null @@ -1,21 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet-extension.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb.hpp" - -namespace duckdb { - - class ParquetExtension : public Extension { - public: - void Load(DuckDB &db) override; - std::string Name() override; - }; - -} // namespace duckdb