Skip to content

Commit

Permalink
chore: refactor compact_object and introduce materialize method (#3300)
Browse files Browse the repository at this point in the history
  • Loading branch information
romange authored Jul 10, 2024
1 parent 21620ef commit 038d081
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 46 deletions.
104 changes: 58 additions & 46 deletions src/core/compact_object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -761,52 +761,7 @@ void CompactObj::SetString(std::string_view str) {
}
}

DCHECK_GT(str.size(), kInlineLen);

string_view encoded = str;
bool is_ascii = kUseAsciiEncoding && detail::validate_ascii_fast(str.data(), str.size());

if (is_ascii) {
size_t encode_len = binpacked_len(str.size());
size_t rev_len = ascii_len(encode_len);

if (rev_len == str.size()) {
mask |= ASCII2_ENC_BIT; // str hits its highest bound.
} else {
CHECK_EQ(str.size(), rev_len - 1) << "Bad ascii encoding for len " << str.size();

mask |= ASCII1_ENC_BIT;
}

tl.tmp_buf.resize(encode_len);
detail::ascii_pack_simd2(str.data(), str.size(), tl.tmp_buf.data());
encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};

if (encoded.size() <= kInlineLen) {
SetMeta(encoded.size(), mask);
detail::ascii_pack(str.data(), str.size(), reinterpret_cast<uint8_t*>(u_.inline_str));

return;
}
}

if (kUseSmallStrings && SmallString::CanAllocate(encoded.size())) {
if (taglen_ == 0) {
SetMeta(SMALL_TAG, mask);
tl.small_str_bytes += u_.small_str.Assign(encoded);
return;
}

if (taglen_ == SMALL_TAG && encoded.size() <= u_.small_str.size()) {
mask_ = mask;
tl.small_str_bytes -= u_.small_str.MallocUsed();
tl.small_str_bytes += u_.small_str.Assign(encoded);
return;
}
}

SetMeta(ROBJ_TAG, mask);
u_.r_obj.SetString(encoded, tl.local_mr);
EncodeString(str);
}

string_view CompactObj::GetSlice(string* scratch) const {
Expand Down Expand Up @@ -1000,6 +955,13 @@ std::pair<size_t, size_t> CompactObj::GetExternalSlice() const {
return pair<size_t, size_t>(offset, size_t(u_.ext_ptr.size));
}

void CompactObj::Materialize(std::string_view str) {
CHECK(IsExternal());
CHECK_GT(str.size(), 20u);

EncodeString(str);
}

void CompactObj::Reset() {
if (HasAllocated()) {
Free();
Expand Down Expand Up @@ -1174,6 +1136,56 @@ bool CompactObj::CmpEncoded(string_view sv) const {
return false;
}

void CompactObj::EncodeString(string_view str) {
DCHECK_GT(str.size(), kInlineLen);

uint8_t mask = mask_ & ~kEncMask;
string_view encoded = str;
bool is_ascii = kUseAsciiEncoding && detail::validate_ascii_fast(str.data(), str.size());

if (is_ascii) {
size_t encode_len = binpacked_len(str.size());
size_t rev_len = ascii_len(encode_len);

if (rev_len == str.size()) {
mask |= ASCII2_ENC_BIT; // str hits its highest bound.
} else {
CHECK_EQ(str.size(), rev_len - 1) << "Bad ascii encoding for len " << str.size();

mask |= ASCII1_ENC_BIT;
}

tl.tmp_buf.resize(encode_len);
detail::ascii_pack_simd2(str.data(), str.size(), tl.tmp_buf.data());
encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};

if (encoded.size() <= kInlineLen) {
SetMeta(encoded.size(), mask);
detail::ascii_pack(str.data(), str.size(), reinterpret_cast<uint8_t*>(u_.inline_str));

return;
}
}

if (kUseSmallStrings && SmallString::CanAllocate(encoded.size())) {
if (taglen_ == 0) {
SetMeta(SMALL_TAG, mask);
tl.small_str_bytes += u_.small_str.Assign(encoded);
return;
}

if (taglen_ == SMALL_TAG && encoded.size() <= u_.small_str.size()) {
mask_ = mask;
tl.small_str_bytes -= u_.small_str.MallocUsed();
tl.small_str_bytes += u_.small_str.Assign(encoded);
return;
}
}

SetMeta(ROBJ_TAG, mask);
u_.r_obj.SetString(encoded, tl.local_mr);
}

size_t CompactObj::DecodedLen(size_t sz) const {
return ascii_len(sz) - ((mask_ & ASCII1_ENC_BIT) ? 1 : 0);
}
Expand Down
4 changes: 4 additions & 0 deletions src/core/compact_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,9 @@ class CompactObj {
void SetExternal(size_t offset, size_t sz);
std::pair<size_t, size_t> GetExternalSlice() const;

// The opposite of SetExternal, changes the external entry to be an in-memory string.
void Materialize(std::string_view str);

// In case this object a single blob, returns number of bytes allocated on heap
// for that blob. Otherwise returns 0.
size_t MallocUsed() const;
Expand Down Expand Up @@ -374,6 +377,7 @@ class CompactObj {
}

private:
void EncodeString(std::string_view str);
size_t DecodedLen(size_t sz) const;

bool EqualNonInline(std::string_view sv) const;
Expand Down

0 comments on commit 038d081

Please sign in to comment.