From 9a68c4b85c700dfe5e9c67342b04247e9508a59a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 14 Nov 2024 15:43:22 +0000 Subject: [PATCH] Support sub-key queries Also requires a refactoring of types. In head-only mode - the metadata in the ledger is just the value, and the value can be anything. So metadata() definition needs to reflect that. There are then issues with appdefined functions for extracting metadata. In theory an appdefined function could extract some unsopprted type. So made explicit that the appdefined function must extract std_metadata() as metadata - otherwise functionality will not work. This means that if it is an object key, that is not a ?HEAD key, then the Metadata must be a tuple (of either Riak or Standard type). --- src/leveled_bookie.erl | 4 +- src/leveled_codec.erl | 12 ++++-- src/leveled_head.erl | 73 ++++++++++++++++---------------- src/leveled_runner.erl | 4 +- src/leveled_sst.erl | 2 +- test/end_to_end/tictac_SUITE.erl | 72 ++++++++++++++++++++++++++++++- 6 files changed, 121 insertions(+), 46 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 4aee189a..e4292e1a 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -2438,13 +2438,13 @@ recalcfor_ledgercache( not_present; {LK, LV} -> case leveled_codec:get_metadata(LV) of - MDO when MDO =/= null -> + MDO when is_tuple(MDO) -> MDO end end, UpdMetadata = case leveled_codec:get_metadata(MetaValue) of - MDU when MDU =/= null -> + MDU when is_tuple(MDU) -> MDU end, IdxSpecs = diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 44e65ae0..67e54f94 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -73,8 +73,9 @@ -type segment_hash() :: % hash of the key to an aae segment - to be used in ledger filters {integer(), integer()}|no_lookup. +-type head_value() :: any(). -type metadata() :: - tuple()|null. % null for empty metadata + tuple()|null|head_value(). % null for empty metadata -type last_moddate() :: % modified date as determined by the object (not this store) % if the object has siblings in the store will be the maximum of those @@ -177,7 +178,8 @@ regular_expression/0, value_fetcher/0, proxy_object/0, - slimmed_key/0 + slimmed_key/0, + head_value/0 ]). @@ -428,6 +430,8 @@ to_querykey(Bucket, Key, Tag, Field, Value) when Tag == ?IDX_TAG -> -spec to_querykey(key()|null, key()|null, tag()) -> query_key(). %% @doc %% Convert something into a ledger query key +to_querykey(Bucket, {Key, SubKey}, Tag) -> + {Tag, Bucket, Key, SubKey}; to_querykey(Bucket, Key, Tag) -> {Tag, Bucket, Key, null}. @@ -781,7 +785,7 @@ gen_headspec( -spec return_proxy (leveled_head:headonly_tag(), leveled_head:object_metadata(), null, journal_ref()) - -> leveled_head:object_metadata(); + -> head_value(); (leveled_head:object_tag(), leveled_head:object_metadata(), pid(), journal_ref()) -> proxy_objectbin(). %% @doc @@ -872,7 +876,7 @@ get_size(PK, Value) -> -spec get_keyandobjhash(tuple(), tuple()) -> tuple(). %% @doc -%% Return a tucple of {Bucket, Key, Hash} where hash is a hash of the object +%% Return a tuple of {Bucket, Key, Hash} where hash is a hash of the object %% not the key (for example with Riak tagged objects this will be a hash of %% the sorted vclock) get_keyandobjhash(LK, Value) -> diff --git a/src/leveled_head.erl b/src/leveled_head.erl index 64c0e19c..5e158ad9 100644 --- a/src/leveled_head.erl +++ b/src/leveled_head.erl @@ -49,29 +49,31 @@ -type headonly_tag() :: ?HEAD_TAG. % Tag assigned to head_only objects. Behaviour cannot be changed --type riak_metadata() :: {binary()|delete, - % Sibling Metadata - binary()|null, - % Vclock Metadata - non_neg_integer()|null, - % Hash of vclock - non-exportable - non_neg_integer() - % Size in bytes of real object - }. --type std_metadata() :: {non_neg_integer()|null, - % Hash of value - non_neg_integer(), - % Size in bytes of real object - list(tuple())|undefined - % User-define metadata - }. --type head_metadata() :: {non_neg_integer()|null, - % Hash of value - non_neg_integer() - % Size in bytes of real object - }. - --type object_metadata() :: riak_metadata()|std_metadata()|head_metadata(). +-type riak_metadata() :: + { + binary()|delete, + % Sibling Metadata + binary()|null, + % Vclock Metadata + non_neg_integer()|null, + % Hash of vclock - non-exportable + non_neg_integer() + % Size in bytes of real object + }. +-type std_metadata() :: + { + non_neg_integer()|null, + % Hash of value + non_neg_integer(), + % Size in bytes of real object + list(tuple())|undefined + % User-define metadata + }. + % std_metadata() must be outputted as the metadata format by any + % app-defined function +-type head_metadata() :: leveled_codec:head_value(). + +-type object_metadata() :: riak_metadata()|std_metadata(). -type appdefinable_function() :: key_to_canonicalbinary | build_head | extract_metadata | diff_indexspecs. @@ -80,12 +82,12 @@ -type appdefinable_keyfun() :: fun((tuple()) -> binary()). -type appdefinable_headfun() :: - fun((object_tag(), object_metadata()) -> head()). + fun((object_tag(), std_metadata()) -> head()). -type appdefinable_metadatafun() :: fun((leveled_codec:tag(), non_neg_integer(), binary()|delete) -> - {object_metadata(), list(erlang:timestamp())}). + {std_metadata(), list(erlang:timestamp())}). -type appdefinable_indexspecsfun() :: - fun((object_tag(), object_metadata(), object_metadata()|not_present) -> + fun((object_tag(), std_metadata(), std_metadata()|not_present) -> leveled_codec:index_specs()). -type appdefinable_function_fun() :: appdefinable_keyfun() | appdefinable_headfun() | @@ -96,12 +98,7 @@ -type index_op() :: add | remove. -type index_value() :: integer() | binary(). --type head() :: - binary()|tuple(). - % TODO: - % This is currently not always a binary. Wish is to migrate this so that - % it is predictably a binary - +-type head() :: binary()|tuple()|head_metadata(). -export_type([object_tag/0, headonly_tag/0, @@ -143,7 +140,9 @@ default_key_to_canonicalbinary(Key) -> leveled_util:t2b(Key). --spec build_head(object_tag()|headonly_tag(), object_metadata()) -> head(). +-spec build_head + (object_tag(), object_metadata()) -> head(); + (headonly_tag(), head_metadata()) -> head() . %% @doc %% Return the object metadata as a binary to be the "head" of the object build_head(?HEAD_TAG, Value) -> @@ -253,22 +252,22 @@ default_reload_strategy(Tag) -> {Tag, retain}. -spec get_size( - object_tag()|headonly_tag(), object_metadata()) -> non_neg_integer(). + object_tag(), object_metadata()) -> non_neg_integer(). %% @doc %% Fetch the size from the metadata get_size(?RIAK_TAG, {_, _, _, Size}) -> Size; -get_size(_Tag, {_, Size, _}) -> +get_size(Tag, {_, Size, _}) when Tag =/= ?HEAD_TAG-> Size. -spec get_hash( - object_tag()|headonly_tag(), object_metadata()) -> non_neg_integer()|null. + object_tag(), object_metadata()) -> non_neg_integer()|null. %% @doc %% Fetch the hash from the metadata get_hash(?RIAK_TAG, {_, _, Hash, _}) -> Hash; -get_hash(_Tag, {Hash, _, _}) -> +get_hash(Tag, {Hash, _, _}) when Tag =/= ?HEAD_TAG -> Hash. -spec standard_hash(any()) -> non_neg_integer(). diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index 9737162f..b1c3f219 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -636,7 +636,9 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) -> end, JK = {leveled_codec:to_objectkey(B, K, Tag), SQN}, case DeferredFetch of - {true, JournalCheck} when MD =/= null -> + {true, false} when Tag == ?HEAD_TAG -> + FoldObjectsFun(B, K, MD, Acc); + {true, JournalCheck} when is_tuple(MD) -> ProxyObj = leveled_codec:return_proxy(Tag, MD, InkerClone, JK), case {JournalCheck, InkerClone} of diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 8aa21a9b..1a5b42df 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -3377,7 +3377,7 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> Chunk = crypto:strong_rand_bytes(64), MV = leveled_codec:convert_to_ledgerv(LK, Seqn, Chunk, 64, infinity), MD = element(4, MV), - MD =/= null orelse error(bad_type), + is_tuple(MD) orelse error(bad_type), ?assertMatch(undefined, element(3, MD)), MD0 = [{magic_md, [<<0:32/integer>>, base64:encode(Chunk)]}], MV0 = setelement(4, MV, setelement(3, MD, MD0)), diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 69b4f8d9..4869184b 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -2,6 +2,7 @@ -include("leveled.hrl"). -export([all/0, init_per_suite/1, end_per_suite/1]). -export([ + multiput_subkeys/1, many_put_compare/1, index_compare/1, basic_headonly/1, @@ -9,6 +10,7 @@ ]). all() -> [ + multiput_subkeys, many_put_compare, index_compare, basic_headonly, @@ -25,8 +27,76 @@ init_per_suite(Config) -> end_per_suite(Config) -> testutil:end_per_suite(Config). -many_put_compare(_Config) -> + +multiput_subkeys(_Config) -> + multiput_subkeys_byvalue({null, 0}), + multiput_subkeys_byvalue(null), + multiput_subkeys_byvalue(<<"binaryValue">>). + +multiput_subkeys_byvalue(V) -> + RootPath = testutil:reset_filestructure("subkeyTest"), + StartOpts = [{root_path, RootPath}, + {max_journalsize, 10000000}, + {max_pencillercachesize, 12000}, + {head_only, no_lookup}, + {sync_strategy, testutil:sync_strategy()}], + {ok, Bookie} = leveled_bookie:book_start(StartOpts), + SubKeyCount = 200000, + + B = {<<"MultiBucketType">>, <<"MultiBucket">>}, + ObjSpecLGen = + fun(K) -> + lists:map( + fun(I) -> + {add, v1, B, K, <>, [os:timestamp()], V} + end, + lists:seq(1, SubKeyCount) + ) + end, + SpecL1 = ObjSpecLGen(<<1:32/integer>>), + load_objectspecs(SpecL1, 32, Bookie), + SpecL2 = ObjSpecLGen(<<2:32/integer>>), + load_objectspecs(SpecL2, 32, Bookie), + SpecL3 = ObjSpecLGen(<<3:32/integer>>), + load_objectspecs(SpecL3, 32, Bookie), + SpecL4 = ObjSpecLGen(<<4:32/integer>>), + load_objectspecs(SpecL4, 32, Bookie), + SpecL5 = ObjSpecLGen(<<5:32/integer>>), + load_objectspecs(SpecL5, 32, Bookie), + + FoldFun = + fun(Bucket, {Key, SubKey}, _Value, Acc) -> + case Bucket of + Bucket when Bucket == B -> + [{Key, SubKey}|Acc] + end + end, + QueryFun = + fun(KeyRange) -> + Range = {range, B, KeyRange}, + {async, R} = + leveled_bookie:book_headfold( + Bookie, ?HEAD_TAG, Range, {FoldFun, []}, false, true, false + ), + L = length(R()), + io:format("query result for range ~p is ~w~n", [Range, L]), + L + end, + + KR1 = {{<<1:32/integer>>, <<>>}, {<<2:32/integer>>, <<>>}}, + KR2 = {{<<3:32/integer>>, <<>>}, {<<5:32/integer>>, <<>>}}, + KR3 = + { + {<<1:32/integer>>, <<10:32/integer>>}, + {<<2:32/integer>>, <<19:32/integer>>} + }, + true = SubKeyCount == QueryFun(KR1), + true = (SubKeyCount * 2) == QueryFun(KR2), + true = (SubKeyCount + 10) == QueryFun(KR3), + leveled_bookie:book_destroy(Bookie). + +many_put_compare(_Config) -> TreeSize = small, SegmentCount = 256 * 256, % Test requires multiple different databases, so want to mount them all