Skip to content

Commit

Permalink
snapshots: move encoding modules to common (#2374)
Browse files Browse the repository at this point in the history
  • Loading branch information
canepat authored Sep 23, 2024
1 parent 9205e8f commit 33c882a
Show file tree
Hide file tree
Showing 12 changed files with 26 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,17 @@
#include <silkworm/core/common/base.hpp>
#include <silkworm/core/common/bytes.hpp>
#include <silkworm/core/common/endian.hpp>
#include <silkworm/db/snapshots/rec_split/common/common.hpp>
#include <silkworm/db/snapshots/rec_split/encoding/sequence.hpp>
#include <silkworm/infra/common/ensure.hpp>
#include <silkworm/infra/common/log.hpp>

#include "sequence.hpp"
#include "util.hpp"

// EliasFano algo overview https://www.antoniomallia.it/sorted-integers-compression-with-elias-fano-encoding.html
// P. Elias. Efficient storage and retrieval by content and address of static files. J. ACM, 21(2):246–260, 1974.
// Partitioned Elias-Fano Indexes http://groups.di.unipi.it/~ottavian/files/elias_fano_sigir14.pdf

namespace silkworm::snapshots::rec_split::encoding {
namespace silkworm::snapshots::encoding {

//! Log2Q = Log2(Quantum)
static constexpr uint64_t kLog2q = 8;
Expand Down Expand Up @@ -569,4 +570,4 @@ class DoubleEliasFanoList16 {
}
};

} // namespace silkworm::snapshots::rec_split::encoding
} // namespace silkworm::snapshots::encoding
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include <silkworm/core/common/util.hpp>
#include <silkworm/infra/test_util/log.hpp>

namespace silkworm::snapshots::rec_split::encoding {
namespace silkworm::snapshots::encoding {

struct EliasFanoList32Test {
std::vector<uint64_t> offsets;
Expand Down Expand Up @@ -168,4 +168,4 @@ TEST_CASE("DoubleEliasFanoList16", "[silkworm][recsplit][elias_fano]") {
"0000000000000000010000000000000000000000000000000000000000000000"));
}

} // namespace silkworm::snapshots::rec_split::encoding
} // namespace silkworm::snapshots::encoding
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@
#include <iostream>

#include <silkworm/core/common/assert.hpp>
#include <silkworm/db/snapshots/rec_split/common/common.hpp>
#include <silkworm/db/snapshots/rec_split/encoding/sequence.hpp>
#include <silkworm/infra/common/log.hpp>

namespace silkworm::snapshots::rec_split::encoding {
#include "sequence.hpp"
#include "util.hpp"

namespace silkworm::snapshots::encoding {

//! Storage for Golomb-Rice codes of a RecSplit bucket.
class GolombRiceVector {
Expand Down Expand Up @@ -253,4 +254,4 @@ class GolombRiceVector {
}
};

} // namespace silkworm::snapshots::rec_split::encoding
} // namespace silkworm::snapshots::encoding
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@
#include <catch2/catch_test_macros.hpp>

#include <silkworm/core/common/random_number.hpp>
#include <silkworm/db/snapshots/rec_split/encoding/sequence.hpp>
#include <silkworm/infra/test_util/log.hpp>

namespace silkworm::snapshots::rec_split::encoding {
namespace silkworm::snapshots::encoding {

static const std::size_t kGolombRiceTestNumKeys{128};
static const std::size_t kGolombRiceTestNumTrees{1'000};
Expand Down Expand Up @@ -86,4 +85,4 @@ TEST_CASE("GolombRiceVector", "[silkworm][recsplit][golomb_rice]") {
}
}

} // namespace silkworm::snapshots::rec_split::encoding
} // namespace silkworm::snapshots::encoding
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include <silkworm/core/common/endian.hpp>
#include <silkworm/infra/common/ensure.hpp>

namespace silkworm::snapshots::rec_split::encoding {
namespace silkworm::snapshots::encoding {

template <UnsignedIntegral T>
using UnsignedIntegralSequence = std::vector<T>;
Expand Down Expand Up @@ -64,4 +64,4 @@ std::istream& operator>>(std::istream& is, UnsignedIntegralSequence<T>& s) {
return is;
}

} // namespace silkworm::snapshots::rec_split::encoding
} // namespace silkworm::snapshots::encoding
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include <silkworm/core/common/endian.hpp>
#include <silkworm/infra/test_util/log.hpp>

namespace silkworm::snapshots::rec_split::encoding {
namespace silkworm::snapshots::encoding {

TEST_CASE("Uint64Sequence", "[silkworm][snapshots][recsplit][sequence]") {
test_util::SetLogVerbosityGuard guard{log::Level::kNone};
Expand All @@ -52,4 +52,4 @@ TEST_CASE("Uint64Sequence: size too big", "[silkworm][snapshots][recsplit][seque
CHECK_THROWS_AS((ss >> input_sequence), std::logic_error);
}

} // namespace silkworm::snapshots::rec_split::encoding
} // namespace silkworm::snapshots::encoding
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@

#include <silkworm/core/common/assert.hpp>

namespace silkworm::snapshots::rec_split {
namespace silkworm::snapshots::encoding {

using std::memcpy;

Expand Down Expand Up @@ -206,4 +206,4 @@ inline uint64_t select64(uint64_t x, uint64_t k) {
#endif
}

} // namespace silkworm::snapshots::rec_split
} // namespace silkworm::snapshots::encoding
4 changes: 2 additions & 2 deletions silkworm/db/snapshots/index/btree_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@

#include <silkworm/infra/common/memory_mapped_file.hpp>

#include "../rec_split/encoding/elias_fano.hpp" // TODO(canepat) move to snapshots/common
#include "../common/encoding/elias_fano.hpp"
#include "../seg/decompressor.hpp"
#include "btree.hpp"

namespace silkworm::snapshots::index {

using rec_split::encoding::EliasFanoList32; // TODO(canepat) remove after moving
using encoding::EliasFanoList32;

class BTreeIndex {
public:
Expand Down
7 changes: 4 additions & 3 deletions silkworm/db/snapshots/rec_split/rec_split.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@
#include <silkworm/core/common/math.hpp>
#include <silkworm/core/common/util.hpp>
#include <silkworm/db/snapshots/common/bitmask_operators.hpp>
#include <silkworm/db/snapshots/rec_split/common/murmur_hash3.hpp>
#include <silkworm/db/snapshots/rec_split/encoding/elias_fano.hpp>
#include <silkworm/db/snapshots/rec_split/encoding/golomb_rice.hpp>
#include <silkworm/db/snapshots/common/encoding/elias_fano.hpp>
#include <silkworm/db/snapshots/common/encoding/golomb_rice.hpp>
#include <silkworm/db/snapshots/rec_split/murmur_hash3.hpp>
#include <silkworm/infra/common/directories.hpp>
#include <silkworm/infra/common/ensure.hpp>
#include <silkworm/infra/common/log.hpp>
Expand All @@ -87,6 +87,7 @@
namespace silkworm::snapshots::rec_split {

using namespace std::chrono;
using encoding::remap16, encoding::remap128;

//! Assumed *maximum* size of a bucket. Works with high probability up to average bucket size ~2000
static const int kMaxBucketSize = 3000;
Expand Down

0 comments on commit 33c882a

Please sign in to comment.