Skip to content

Commit

Permalink
Make use a more coherent use of filters and filters_meta in docst…
Browse files Browse the repository at this point in the history
…rings
  • Loading branch information
FrancescAlted committed Feb 12, 2024
1 parent a4120e7 commit 2f8f6ed
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 27 deletions.
6 changes: 3 additions & 3 deletions README_CFRAME_FORMAT.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ The header contains information needed to decompress the Blosc chunks contained
+-- [msgpack] int32

The filter pipeline is stored next in the header. It contains 6 slots, one for each filter that can be applied. For
each slot there is a byte used to store the filter code in `filter_codes` and an associated byte used to store any
possible filter meta-info in `filter_meta`::
each slot there is a byte used to store the filter ID in `filters` and an associated byte used to store any
possible filter meta-info in `filters_meta`::


|-45|-46|-47|-48|-49|-4A|-4B|-4C|-4D|-4E|-4F|-50|-51|-52|-53|-54|-55|-56|
| d2| X | filter_codes |_f4|_f5| filter_meta | | |
| d2| X | filters |_f4|_f5| filters_meta | | |
|---|---|-------------------------------|-------------------------------|
^ ^ ^ ^ ^ ^
| | | | | +-- reserved
Expand Down
21 changes: 11 additions & 10 deletions README_CHUNK_FORMAT.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Starting in Blosc 2.0.0, there is an extension of the header above that allows
for encoding blocks with a filter pipeline::

1+|-0-|-1-|-2-|-3-|-4-|-5-|-6-|-7-|-8-|-9-|-A-|-B-|-C-|-D-|-E-|-F-|
| filter codes | ^ | ^ | filter meta | ^ | ^ |
| filters | ^ | ^ | filters_meta | ^ | ^ |
| | | |
| +- compcode_meta | +-blosc2_flags
+- user-defined codec +-reserved
Expand Down Expand Up @@ -105,8 +105,8 @@ for encoding blocks with a filter pipeline::
:cbytes:
(``int32``) Compressed size of the buffer (including this header).

:filter_codes:
(``uint8``) Filter code.
:filters:
(``uint8``) Filter ID.

:``0``:
No shuffle (for compatibility with Blosc1).
Expand All @@ -121,11 +121,12 @@ for encoding blocks with a filter pipeline::
:``4``:
Truncate precision filter.
:``5``:
User-defined filter.
Sentinel. IDs larger than this are either global registered or user-defined filters.

The filter pipeline has 6 reserved slots for the filters. They are applied sequentially to the chunk according
to their index in increasing order. The type of filter applied is specified by the `filter_code`. Each
`filter_code` has an associated field in `filter_meta` that can contain metadata about the filter.
The filter pipeline has 6 reserved slots for the filters IDs. They are applied sequentially
to the chunk according to their index (in increasing order). The type of filter applied is
specified by the ID. Each ID has an associated field in `filters_meta` that can contain metadata
about the filter.

:udcodec:
(``uint8``) User-defined codec identifier.
Expand All @@ -135,10 +136,10 @@ for encoding blocks with a filter pipeline::

Metadata associated with the compression codec.

:filter_meta:
(``uint8``) Filter metadata.
:filters_meta:
(``uint8``) Filter metadata associated to each filter ID.

Metadata associated with the filter code.
Metadata associated with the filter ID.

:blosc2_flags:
(``bitfield``) The flags for a Blosc2 buffer.
Expand Down
20 changes: 10 additions & 10 deletions blosc/blosc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -660,10 +660,10 @@ typedef struct blosc_header_s {
int32_t blocksize;
int32_t cbytes;
// Extended Blosc2 header
uint8_t filter_codes[BLOSC2_MAX_FILTERS];
uint8_t filters[BLOSC2_MAX_FILTERS];
uint8_t udcompcode;
uint8_t compcode_meta;
uint8_t filter_meta[BLOSC2_MAX_FILTERS];
uint8_t filters_meta[BLOSC2_MAX_FILTERS];
uint8_t reserved2;
uint8_t blosc2_flags;
} blosc_header;
Expand Down Expand Up @@ -739,12 +739,12 @@ int read_chunk_header(const uint8_t* src, int32_t srcsize, bool extended_header,
// The number of filters depends on the version of the header. Blosc2 alpha series
// did not initialize filters to zero beyond the max supported.
if (header->version == BLOSC2_VERSION_FORMAT_ALPHA) {
header->filter_codes[5] = 0;
header->filter_meta[5] = 0;
header->filters[5] = 0;
header->filters_meta[5] = 0;
}
}
else {
flags_to_filters(header->flags, header->filter_codes);
flags_to_filters(header->flags, header->filters);
}
return 0;
}
Expand Down Expand Up @@ -775,11 +775,11 @@ static int blosc2_initialize_context_from_header(blosc2_context* context, blosc_
/* Extended header */
context->header_overhead = BLOSC_EXTENDED_HEADER_LENGTH;

memcpy(context->filters, header->filter_codes, BLOSC2_MAX_FILTERS);
memcpy(context->filters_meta, header->filter_meta, BLOSC2_MAX_FILTERS);
memcpy(context->filters, header->filters, BLOSC2_MAX_FILTERS);
memcpy(context->filters_meta, header->filters_meta, BLOSC2_MAX_FILTERS);
context->compcode_meta = header->compcode_meta;

context->filter_flags = filters_to_flags(header->filter_codes);
context->filter_flags = filters_to_flags(header->filters);
context->special_type = (header->blosc2_flags >> 4) & BLOSC2_SPECIAL_MASK;

is_lazy = (context->blosc2_flags & 0x08u);
Expand Down Expand Up @@ -894,8 +894,8 @@ static int blosc2_intialize_header_from_context(blosc2_context* context, blosc_h
if (extended_header) {
/* Store filter pipeline info at the end of the header */
for (int i = 0; i < BLOSC2_MAX_FILTERS; i++) {
header->filter_codes[i] = context->filters[i];
header->filter_meta[i] = context->filters_meta[i];
header->filters[i] = context->filters[i];
header->filters_meta[i] = context->filters_meta[i];
}
header->udcompcode = context->compcode;
header->compcode_meta = context->compcode_meta;
Expand Down
6 changes: 4 additions & 2 deletions include/blosc2/filters-registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ enum {
BLOSC_FILTER_BYTEDELTA_BUGGY = 34,
// buggy version. See #524
BLOSC_FILTER_BYTEDELTA = 35,
//!< Byteshuffle + delta. Sometimes this can represent an advantage over
//!< Byteshuffle + delta. The typesize should be specified in the `filters_meta` slot.
//!< Sometimes this can represent an advantage over
//!< @ref BLOSC_SHUFFLE or @ref BLOSC_BITSHUFFLE.
//!< See https://www.blosc.org/posts/bytedelta-enhance-compression-toolset/
BLOSC_FILTER_INT_TRUNC = 36,
//!< Truncate int precision; positive values in `filter_meta` will keep bits; negative values will zero bits.
//!< Truncate int precision; positive values in `filters_meta` slot will keep bits;
//!< negative values will remove (set to zero) bits.
//!< This is similar to @ref BLOSC_TRUNC_PREC, but for integers instead of floating point data.
};

Expand Down
4 changes: 2 additions & 2 deletions tests/test_schunk_header.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ static char* test_schunk_header(void) {
mu_assert("err clevel", schunk->clevel == 3);
mu_assert("err typesize", schunk->typesize == 4);
mu_assert("err blocksize", schunk->blocksize == 1024 * cparams.typesize);
mu_assert("err filter_meta 1", schunk->filters_meta[0] = 23);
mu_assert("err filter meta 2", schunk->filters_meta[1] = 24);
mu_assert("err 1", schunk->filters_meta[0] = 23);
mu_assert("err filters_meta 2", schunk->filters_meta[1] = 24);
mu_assert("err filters 4", schunk->filters[4] = BLOSC_DELTA);

// Check that the chunks have been decompressed correctly
Expand Down

0 comments on commit 2f8f6ed

Please sign in to comment.