Skip to content

Commit

Permalink
Mas d34 ms.i446 plusplus (#448)
Browse files Browse the repository at this point in the history
* Minor optimisations

Try to reduce the calls to ++, and ensure that where possible the shorted list is being copied.

* Pass Acc into function

So that the list can be accumulated efficiently, without an additional copy to add back the accumulator at the end.

* prepend to accumulators

Code review to make sure we prepend to accumulators everywhere, to reduce the copying involved.

attempt to further optimise in leveled_sst (where most expensive ++ occurs).  This optimises for the case when Acc is [], and enforces a series of '++' to start from the right, prepending in turn.  Some shell testing indicated that this was not necessarily the case (although this doesn't seem tobe consistently reproducible).

```
6> element(1, timer:tc(fun() -> KL1 ++ KL2 ++ KL3 ++ KL4 end)).
28
7> element(1, timer:tc(fun() -> KL1 ++ KL2 ++ KL3 ++ KL4 end)).
174
8> element(1, timer:tc(fun() -> KL1 ++ KL2 ++ KL3 ++ KL4 end)).
96
9> element(1, timer:tc(fun() -> KL1 ++ KL2 ++ KL3 ++ KL4 end)).
106
10> element(1, timer:tc(fun() -> KL1 ++ KL2 ++ KL3 ++ KL4 end)).
112

17> element(1, timer:tc(fun() -> lists:foldr(fun(KL0, KLAcc) -> KL0 ++ KLAcc end, [], [KL1, KL2, KL3, KL4]) end)).
21
18> element(1, timer:tc(fun() -> lists:foldr(fun(KL0, KLAcc) -> KL0 ++ KLAcc end, [], [KL1, KL2, KL3, KL4]) end)).
17
19> element(1, timer:tc(fun() -> lists:foldr(fun(KL0, KLAcc) -> KL0 ++ KLAcc end, [], [KL1, KL2, KL3, KL4]) end)).
12
20> element(1, timer:tc(fun() -> lists:foldr(fun(KL0, KLAcc) -> KL0 ++ KLAcc end, [], [KL1, KL2, KL3, KL4]) end)).
11
```

running eprof indicates that '++' and lists:reverse have been reduced (however impact had only previously been 1-2%)

* Add unit test to confirm (limited) merit of optimised list function

No difference in unit test with/without inline compilation, so this has been removed

* Update src/leveled_sst.erl

These functions had previously used inline compilation - but this didn't appear to improve performance

Co-authored-by: Thomas Arts <[email protected]>

* Update src/leveled_sst.erl

Co-authored-by: Thomas Arts <[email protected]>

* Update src/leveled_ebloom.erl

Co-authored-by: Thomas Arts <[email protected]>

* Update following review

Also fix code coverage issues

* Update src/leveled_sst.erl

Co-authored-by: Thomas Arts <[email protected]>

---------

Co-authored-by: Thomas Arts <[email protected]>
  • Loading branch information
martinsumner and ThomasArts authored Sep 5, 2024
1 parent 30ec921 commit 5db277b
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 176 deletions.
56 changes: 35 additions & 21 deletions src/leveled_cdb.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1722,11 +1722,11 @@ build_hashtree_binary(SlotMap, IndexLength) ->

build_hashtree_binary([], IdxLen, SlotPos, Bin) ->
case SlotPos of
IdxLen ->
lists:reverse(Bin);
N when N == IdxLen ->
Bin;
N when N < IdxLen ->
ZeroLen = (IdxLen - N) * 64,
lists:reverse([<<0:ZeroLen>>|Bin])
[<<0:ZeroLen>>|Bin]
end;
build_hashtree_binary([{TopSlot, TopBin}|SlotMapTail], IdxLen, SlotPos, Bin) ->
case TopSlot of
Expand Down Expand Up @@ -1774,21 +1774,23 @@ write_hash_tables([], _HashTree, _CurrPos, _BasePos,
IndexList, HT_BinList, {T1, T2, T3}) ->
leveled_log:log(cdb14, [T1, T2, T3]),
IL = lists:reverse(IndexList),
{IL, list_to_binary(HT_BinList)};
{IL, list_to_binary(lists:reverse(HT_BinList))};
write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos,
IndexList, HT_BinList, Timers) ->
SW1 = os:timestamp(),
SlotMap = to_slotmap(HashTree, Index),
T1 = timer:now_diff(os:timestamp(), SW1) + element(1, Timers),
case SlotMap of
[] ->
write_hash_tables(Rest,
HashTree,
CurrPos,
BasePos,
[{Index, BasePos, 0}|IndexList],
HT_BinList,
Timers);
write_hash_tables(
Rest,
HashTree,
CurrPos,
BasePos,
[{Index, BasePos, 0}|IndexList],
HT_BinList,
Timers
);
_ ->
SW2 = os:timestamp(),
IndexLength = length(SlotMap) * 2,
Expand All @@ -1797,13 +1799,15 @@ write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos,
SW3 = os:timestamp(),
NewSlotBin = build_hashtree_binary(SortedMap, IndexLength),
T3 = timer:now_diff(os:timestamp(), SW3) + element(3, Timers),
write_hash_tables(Rest,
HashTree,
CurrPos + IndexLength * ?DWORD_SIZE,
BasePos,
[{Index, CurrPos, IndexLength}|IndexList],
HT_BinList ++ NewSlotBin,
{T1, T2, T3})
write_hash_tables(
Rest,
HashTree,
CurrPos + IndexLength * ?DWORD_SIZE,
BasePos,
[{Index, CurrPos, IndexLength}|IndexList],
lists:append(NewSlotBin, HT_BinList),
{T1, T2, T3}
)
end.


Expand Down Expand Up @@ -1949,7 +1953,12 @@ build_hashtree_bunchedatend_binary_test() ->
{14, <<15:32, 500:32>>},
{15, <<16:32, 600:32>>},
{15, <<17:32, 700:32>>}],
Bin = list_to_binary(build_hashtree_binary(SlotMap, 16)),
Bin =
list_to_binary(
lists:reverse(
build_hashtree_binary(SlotMap, 16)
)
),
ExpBinP1 = <<16:32, 600:32, 10:32, 0:32, 17:32, 700:32, 0:64>>,
ExpBinP2 = <<11:32, 100:32, 0:192, 12:32, 200:32, 13:32, 300:32, 0:256>>,
ExpBinP3 = <<14:32, 400:32, 15:32, 500:32>>,
Expand All @@ -1965,7 +1974,12 @@ build_hashtree_bunchedatstart_binary_test() ->
{6, <<15:32, 500:32>>},
{7, <<16:32, 600:32>>},
{8, <<17:32, 700:32>>}],
Bin = list_to_binary(build_hashtree_binary(SlotMap, 16)),
Bin =
list_to_binary(
lists:reverse(
build_hashtree_binary(SlotMap, 16)
)
),
ExpBinP1 = <<0:64, 10:32, 0:32, 11:32, 100:32, 12:32, 200:32>>,
ExpBinP2 = <<13:32, 300:32, 14:32, 400:32, 15:32, 500:32, 16:32, 600:32>>,
ExpBinP3 = <<17:32, 700:32, 0:448>>,
Expand All @@ -1988,7 +2002,7 @@ build_hashtree_test() ->
[<<2424915712:32, 300:32>>, <<0:64>>] ++
[<<2424903936:32, 400:32>>, <<2424907008:32, 500:32>>] ++
[<<2424913408:32, 600:32>>],
?assertMatch(ExpOut, BinList).
?assertMatch(ExpOut, lists:reverse(BinList)).


find_firstzero_test() ->
Expand Down
4 changes: 2 additions & 2 deletions src/leveled_ebloom.erl
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ check_hash({_SegHash, Hash}, BloomBin) when is_binary(BloomBin)->
map_hashes([], HashListTuple, _SlotCount) ->
HashListTuple;
map_hashes([Hash|Rest], HashListTuple, SlotCount) ->
{Slot, Hashes} = split_hash(element(2, Hash), SlotCount),
{Slot, [H0, H1]} = split_hash(element(2, Hash), SlotCount),
SlotHL = element(Slot + 1, HashListTuple),
map_hashes(
Rest,
setelement(Slot + 1, HashListTuple, Hashes ++ SlotHL),
setelement(Slot + 1, HashListTuple, [H0, H1 | SlotHL]),
SlotCount).

-spec split_hash(external_hash(), slot_count())
Expand Down
20 changes: 11 additions & 9 deletions src/leveled_head.erl
Original file line number Diff line number Diff line change
Expand Up @@ -421,9 +421,9 @@ decode_maybe_binary(<<0, Bin/binary>>) ->
decode_maybe_binary(<<_Other:8, Bin/binary>>) ->
Bin.

-spec diff_index_data([{binary(), index_value()}],
[{binary(), index_value()}]) ->
[{index_op(), binary(), index_value()}].
-spec diff_index_data(
[{binary(), index_value()}], [{binary(), index_value()}]) ->
[{index_op(), binary(), index_value()}].
diff_index_data(OldIndexes, AllIndexes) ->
OldIndexSet = ordsets:from_list(OldIndexes),
AllIndexSet = ordsets:from_list(AllIndexes),
Expand All @@ -435,18 +435,20 @@ diff_specs_core(AllIndexSet, OldIndexSet) ->
RemoveIndexSet =
ordsets:subtract(OldIndexSet, AllIndexSet),
NewIndexSpecs =
assemble_index_specs(ordsets:subtract(NewIndexSet, OldIndexSet),
add),
assemble_index_specs(
ordsets:subtract(NewIndexSet, OldIndexSet),
add
),
RemoveIndexSpecs =
assemble_index_specs(RemoveIndexSet,
remove),
assemble_index_specs(RemoveIndexSet, remove),
NewIndexSpecs ++ RemoveIndexSpecs.

%% @doc Assemble a list of index specs in the
%% form of triplets of the form
%% {IndexOperation, IndexField, IndexValue}.
-spec assemble_index_specs([{binary(), binary()}], index_op()) ->
[{index_op(), binary(), binary()}].
-spec assemble_index_specs(
[{binary(), binary()}], index_op()) ->
[{index_op(), binary(), binary()}].
assemble_index_specs(Indexes, IndexOp) ->
[{IndexOp, Index, Value} || {Index, Value} <- Indexes].

Expand Down
109 changes: 64 additions & 45 deletions src/leveled_pclerk.erl
Original file line number Diff line number Diff line change
Expand Up @@ -217,21 +217,27 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) ->
Src =
leveled_pmanifest:mergefile_selector(Manifest, SrcLevel, SelectMethod),
NewSQN = leveled_pmanifest:get_manifest_sqn(Manifest) + 1,
SinkList = leveled_pmanifest:merge_lookup(Manifest,
SrcLevel + 1,
Src#manifest_entry.start_key,
Src#manifest_entry.end_key),
SinkList =
leveled_pmanifest:merge_lookup(
Manifest,
SrcLevel + 1,
Src#manifest_entry.start_key,
Src#manifest_entry.end_key
),
Candidates = length(SinkList),
leveled_log:log(pc008, [SrcLevel, Candidates]),
case Candidates of
0 ->
NewLevel = SrcLevel + 1,
leveled_log:log(pc009, [Src#manifest_entry.filename, NewLevel]),
leveled_sst:sst_switchlevels(Src#manifest_entry.owner, NewLevel),
Man0 = leveled_pmanifest:switch_manifest_entry(Manifest,
NewSQN,
SrcLevel,
Src),
Man0 =
leveled_pmanifest:switch_manifest_entry(
Manifest,
NewSQN,
SrcLevel,
Src
),
{Man0, []};
_ ->
SST_RP = leveled_penciller:sst_rootpath(RootPath),
Expand All @@ -253,69 +259,82 @@ notify_deletions([Head|Tail], Penciller) ->
%%
%% SrcLevel is the level of the src sst file, the sink should be srcLevel + 1

perform_merge(Manifest,
Src, SinkList, SrcLevel,
RootPath, NewSQN,
OptsSST) ->
perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN, OptsSST) ->
leveled_log:log(pc010, [Src#manifest_entry.filename, NewSQN]),
SrcList = [{next, Src, all}],
MaxSQN = leveled_sst:sst_getmaxsequencenumber(Src#manifest_entry.owner),
SinkLevel = SrcLevel + 1,
SinkBasement = leveled_pmanifest:is_basement(Manifest, SinkLevel),
Additions =
do_merge(SrcList, SinkList,
SinkLevel, SinkBasement,
RootPath, NewSQN, MaxSQN,
OptsSST,
[]),
do_merge(
SrcList, SinkList,
SinkLevel, SinkBasement,
RootPath, NewSQN, MaxSQN,
OptsSST,
[]
),
RevertPointerFun =
fun({next, ME, _SK}) ->
ME
end,
SinkManifestList = lists:map(RevertPointerFun, SinkList),
Man0 = leveled_pmanifest:replace_manifest_entry(Manifest,
NewSQN,
SinkLevel,
SinkManifestList,
Additions),
Man2 = leveled_pmanifest:remove_manifest_entry(Man0,
NewSQN,
SrcLevel,
Src),
Man0 =
leveled_pmanifest:replace_manifest_entry(
Manifest,
NewSQN,
SinkLevel,
SinkManifestList,
Additions
),
Man2 =
leveled_pmanifest:remove_manifest_entry(
Man0,
NewSQN,
SrcLevel,
Src
),
{Man2, [Src|SinkManifestList]}.

do_merge([], [], SinkLevel, _SinkB, _RP, NewSQN, _MaxSQN, _Opts, Additions) ->
leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions)]),
Additions;
lists:reverse(Additions);
do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) ->
FileName = leveled_penciller:sst_filename(NewSQN,
SinkLevel,
length(Additions)),
FileName =
leveled_penciller:sst_filename(
NewSQN, SinkLevel, length(Additions)
),
leveled_log:log(pc012, [NewSQN, FileName, SinkB]),
TS1 = os:timestamp(),
case leveled_sst:sst_newmerge(RP, FileName,
KL1, KL2, SinkB, SinkLevel, MaxSQN,
OptsSST) of
empty ->
leveled_log:log(pc013, [FileName]),
do_merge([], [],
SinkLevel, SinkB,
RP, NewSQN, MaxSQN,
OptsSST,
Additions);
do_merge(
[], [],
SinkLevel, SinkB,
RP, NewSQN, MaxSQN,
OptsSST,
Additions
);
{ok, Pid, Reply, Bloom} ->
{{KL1Rem, KL2Rem}, SmallestKey, HighestKey} = Reply,
Entry = #manifest_entry{start_key=SmallestKey,
end_key=HighestKey,
owner=Pid,
filename=FileName,
bloom=Bloom},
Entry =
#manifest_entry{
start_key=SmallestKey,
end_key=HighestKey,
owner=Pid,
filename=FileName,
bloom=Bloom
},
leveled_log:log_timer(pc015, [], TS1),
do_merge(KL1Rem, KL2Rem,
SinkLevel, SinkB,
RP, NewSQN, MaxSQN,
OptsSST,
Additions ++ [Entry])
do_merge(
KL1Rem, KL2Rem,
SinkLevel, SinkB,
RP, NewSQN, MaxSQN,
OptsSST,
[Entry|Additions]
)
end.

-spec grooming_scorer(
Expand Down
3 changes: 3 additions & 0 deletions src/leveled_penciller.erl
Original file line number Diff line number Diff line change
Expand Up @@ -2002,6 +2002,9 @@ format_status_test() ->
?assertMatch(redacted, ST#state.levelzero_cache),
?assertMatch(redacted, ST#state.levelzero_index),
?assertMatch(redacted, ST#state.levelzero_astree),
NormStatus = format_status(#{reason => normal, state => S}),
NST = maps:get(state, NormStatus),
?assert(is_integer(array:size(element(2, NST#state.manifest)))),
clean_testdir(RootPath).

close_no_crash_test_() ->
Expand Down
Loading

0 comments on commit 5db277b

Please sign in to comment.