-
Notifications
You must be signed in to change notification settings - Fork 90
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Column selection / Filtering / Projections #15
Merged
Merged
Changes from 4 commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
24c6c40
Column query / thread-safe detoasting
mkaruza 28e46d3
Tuple filtering on page level
mkaruza ebe5014
Query COUNT(*)
mkaruza 5b66cd7
Fixed issue with column filtering and projection
mkaruza a6cb22d
Add FIXME reminder for duckdb_malloc/duckdb_free calls
mkaruza File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#pragma once | ||
|
||
#include "duckdb.hpp" | ||
|
||
extern "C" { | ||
#include "postgres.h" | ||
} | ||
|
||
#include <mutex> | ||
|
||
namespace quack { | ||
|
||
Datum DetoastPostgresDatum(struct varlena *value, std::mutex &lock, bool *shouldFree); | ||
|
||
} // namespace quack |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#pragma once | ||
|
||
#include "duckdb.hpp" | ||
|
||
extern "C" { | ||
#include "postgres.h" | ||
} | ||
|
||
namespace quack { | ||
bool ApplyValueFilter(duckdb::TableFilter &filter, Datum &value, bool isNull, Oid typeOid); | ||
} // namespace quack |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,6 @@ | |
|
||
extern "C" { | ||
#include "postgres.h" | ||
|
||
#include "executor/executor.h" | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
#include "duckdb.hpp" | ||
|
||
extern "C" { | ||
#include "postgres.h" | ||
#include "pg_config.h" | ||
#include "varatt.h" | ||
|
||
#ifdef USE_LZ4 | ||
#include <lz4.h> | ||
#endif | ||
|
||
#include "access/detoast.h" | ||
#include "access/table.h" | ||
#include "access/tableam.h" | ||
#include "access/toast_internals.h" | ||
#include "common/pg_lzcompress.h" | ||
#include "utils/expandeddatum.h" | ||
} | ||
|
||
#include "quack/quack_types.hpp" | ||
#include "quack/quack_detoast.hpp" | ||
|
||
/* | ||
* Following functions are direct logic found in postgres code but for duckdb execution they are needed to be thread | ||
* safe. Functions as palloc/pfree are exchanged with duckdb_malloc/duckdb_free. Access to toast table is protected with | ||
* lock also for thread safe reasons. This is initial implementation but should be revisisted in future for better | ||
* performances. | ||
*/ | ||
|
||
namespace quack { | ||
|
||
struct varlena * | ||
_pglz_decompress_datum(const struct varlena *value) { | ||
struct varlena *result; | ||
int32 rawsize; | ||
|
||
result = (struct varlena *)duckdb_malloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); | ||
|
||
rawsize = pglz_decompress((char *)value + VARHDRSZ_COMPRESSED, VARSIZE(value) - VARHDRSZ_COMPRESSED, | ||
VARDATA(result), VARDATA_COMPRESSED_GET_EXTSIZE(value), true); | ||
if (rawsize < 0) | ||
ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("compressed pglz data is corrupt"))); | ||
|
||
SET_VARSIZE(result, rawsize + VARHDRSZ); | ||
|
||
return result; | ||
} | ||
|
||
struct varlena * | ||
_lz4_decompress_datum(const struct varlena *value) { | ||
#ifndef USE_LZ4 | ||
return NULL; /* keep compiler quiet */ | ||
#else | ||
int32 rawsize; | ||
struct varlena *result; | ||
|
||
result = (struct varlena *)duckdb_malloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); | ||
|
||
rawsize = LZ4_decompress_safe((char *)value + VARHDRSZ_COMPRESSED, VARDATA(result), | ||
VARSIZE(value) - VARHDRSZ_COMPRESSED, VARDATA_COMPRESSED_GET_EXTSIZE(value)); | ||
if (rawsize < 0) | ||
ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("compressed lz4 data is corrupt"))); | ||
|
||
SET_VARSIZE(result, rawsize + VARHDRSZ); | ||
|
||
return result; | ||
#endif | ||
} | ||
|
||
static struct varlena * | ||
_toast_decompress_datum(struct varlena *attr) { | ||
ToastCompressionId cmid; | ||
cmid = (ToastCompressionId)TOAST_COMPRESS_METHOD(attr); | ||
switch (cmid) { | ||
case TOAST_PGLZ_COMPRESSION_ID: | ||
return _pglz_decompress_datum(attr); | ||
case TOAST_LZ4_COMPRESSION_ID: | ||
return _lz4_decompress_datum(attr); | ||
default: | ||
elog(ERROR, "invalid compression method id %d", TOAST_COMPRESS_METHOD(attr)); | ||
return NULL; /* keep compiler quiet */ | ||
} | ||
} | ||
|
||
static struct varlena * | ||
_toast_fetch_datum(struct varlena *attr, std::mutex &lock) { | ||
Relation toastrel; | ||
struct varlena *result; | ||
struct varatt_external toast_pointer; | ||
int32 attrsize; | ||
|
||
if (!VARATT_IS_EXTERNAL_ONDISK(attr)) | ||
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums"); | ||
|
||
/* Must copy to access aligned fields */ | ||
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); | ||
|
||
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); | ||
|
||
result = (struct varlena *)duckdb_malloc(attrsize + VARHDRSZ); | ||
|
||
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) { | ||
SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ); | ||
} else { | ||
SET_VARSIZE(result, attrsize + VARHDRSZ); | ||
} | ||
|
||
if (attrsize == 0) | ||
return result; | ||
|
||
lock.lock(); | ||
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock); | ||
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid, attrsize, 0, attrsize, result); | ||
table_close(toastrel, AccessShareLock); | ||
lock.unlock(); | ||
|
||
return result; | ||
} | ||
|
||
Datum | ||
DetoastPostgresDatum(struct varlena *attr, std::mutex &lock, bool *shouldFree) { | ||
struct varlena *toastedValue = nullptr; | ||
*shouldFree = true; | ||
if (VARATT_IS_EXTERNAL_ONDISK(attr)) { | ||
toastedValue = _toast_fetch_datum(attr, lock); | ||
if (VARATT_IS_COMPRESSED(toastedValue)) { | ||
struct varlena *tmp = toastedValue; | ||
toastedValue = _toast_decompress_datum(tmp); | ||
duckdb_free(tmp); | ||
} | ||
} else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { | ||
struct varatt_indirect redirect; | ||
VARATT_EXTERNAL_GET_POINTER(redirect, attr); | ||
toastedValue = (struct varlena *)redirect.pointer; | ||
toastedValue = reinterpret_cast<struct varlena *>(DetoastPostgresDatum(attr, lock, shouldFree)); | ||
if (attr == (struct varlena *)redirect.pointer) { | ||
struct varlena *result; | ||
result = (struct varlena *)(VARSIZE_ANY(attr)); | ||
memcpy(result, attr, VARSIZE_ANY(attr)); | ||
toastedValue = result; | ||
} | ||
} else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) { | ||
ExpandedObjectHeader *eoh; | ||
Size resultsize; | ||
eoh = DatumGetEOHP(PointerGetDatum(attr)); | ||
resultsize = EOH_get_flat_size(eoh); | ||
toastedValue = (struct varlena *)duckdb_malloc(resultsize); | ||
EOH_flatten_into(eoh, (void *)toastedValue, resultsize); | ||
} else if (VARATT_IS_COMPRESSED(attr)) { | ||
toastedValue = _toast_decompress_datum(attr); | ||
} else if (VARATT_IS_SHORT(attr)) { | ||
Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; | ||
Size new_size = data_size + VARHDRSZ; | ||
toastedValue = (struct varlena *)duckdb_malloc(new_size); | ||
SET_VARSIZE(toastedValue, new_size); | ||
memcpy(VARDATA(toastedValue), VARDATA_SHORT(attr), data_size); | ||
} else { | ||
toastedValue = attr; | ||
*shouldFree = false; | ||
} | ||
|
||
return reinterpret_cast<Datum>(toastedValue); | ||
} | ||
|
||
} // namespace quack |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
duckdb_malloc/free
are just wrappers aroundmalloc
andfree
, they don't serve an added benefitAlso these methods are part of the C api, which we're not using here