From 9058ba248cb03583de1c571cd644c0058a040d06 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 9 Jul 2024 20:14:27 +0100 Subject: [PATCH 1/6] Make tictac more efficient by making level1 a map Pre-change (1M keys, tree size large): Generating Keys took 2513 milliseconds Memory footprint [{total,356732576},{processes,334051328},{processes_used,334044488},{system,22681248},{atom,540873},{atom_used,524383},{binary,1015120},{code,9692859},{ets,721496}] Generating new tree took 1 milliseconds Loading tree took 27967 milliseconds Memory footprint [{total,36733040},{processes,8875472},{processes_used,8875048},{system,27857568},{atom,540873},{atom_used,524449},{binary,6236480},{code,9692859},{ets,721496}] Exporting tree took 434 milliseconds Importing tree took 100 milliseconds Memory footprint [{total,155941512},{processes,123734808},{processes_used,123734384},{system,32206704},{atom,540873},{atom_used,524449},{binary,10401144},{code,9692859},{ets,721496}] Garbage collect Memory footprint [{total,39660504},{processes,8257520},{processes_used,8256968},{system,31402984},{atom,540873},{atom_used,524449},{binary,9781760},{code,9692859},{ets,721496}] Post change: Generating Keys took 2416 milliseconds Memory footprint [{total,284678120},{processes,258349528},{processes_used,257758568},{system,26328592},{atom,893161},{atom_used,878150},{binary,1013880},{code,11770188},{ets,774224}] Generating new tree took 0 milliseconds Loading tree took 2072 milliseconds Memory footprint [{total,49957448},{processes,17244856},{processes_used,16653896},{system,32712592},{atom,893161},{atom_used,878216},{binary,7397496},{code,11770188},{ets,774224}] Exporting tree took 448 milliseconds Importing tree took 108 milliseconds Memory footprint [{total,46504880},{processes,11197344},{processes_used,10606384},{system,35307536},{atom,893161},{atom_used,878216},{binary,9992112},{code,11770188},{ets,774224}] Garbage collect Memory footprint [{total,47394048},{processes,12223608},{processes_used,11632520},{system,35170440},{atom,893161},{atom_used,878216},{binary,9855008},{code,11770188},{ets,774224}] --- src/leveled_penciller.erl | 8 +- src/leveled_tictac.erl | 157 ++++++++++++++++++++++++++++++++------ 2 files changed, 135 insertions(+), 30 deletions(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index eeb358fa..4c9e7cbb 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -159,7 +159,7 @@ -behaviour(gen_server). --include("include/leveled.hrl"). +-include("leveled.hrl"). -export([ init/1, @@ -207,23 +207,17 @@ -export([clean_testdir/1]). -endif. --define(MAX_WORK_WAIT, 300). -define(MANIFEST_FP, "ledger_manifest"). -define(FILES_FP, "ledger_files"). --define(CURRENT_FILEX, "crr"). --define(PENDING_FILEX, "pnd"). -define(SST_FILEX, ".sst"). -define(ARCHIVE_FILEX, ".bak"). -define(SUPER_MAX_TABLE_SIZE, 40000). --define(PROMPT_WAIT_ONL0, 5). -define(WORKQUEUE_BACKLOG_TOLERANCE, 4). -define(COIN_SIDECOUNT, 4). -define(SLOW_FETCH, 500000). % Log a very slow fetch - longer than 500ms -define(FOLD_SCANWIDTH, 32). -define(ITERATOR_SCANWIDTH, 4). -define(ITERATOR_MINSCANWIDTH, 1). --define(TIMING_SAMPLECOUNTDOWN, 10000). --define(TIMING_SAMPLESIZE, 100). -define(SHUTDOWN_LOOPS, 10). -define(SHUTDOWN_PAUSE, 10000). % How long to wait for snapshots to be released on shutdown diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index e84c46a3..bb0e2b3d 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -52,8 +52,6 @@ -module(leveled_tictac). --include("include/leveled.hrl"). - -export([ new_tree/1, new_tree/2, @@ -102,10 +100,12 @@ size :: tree_size(), width :: integer(), segment_count :: integer(), - level1 :: binary(), + level1 :: level1_map(), level2 :: any() % an array - but OTP compatibility }). +-type level1_map() :: #{non_neg_integer() => binary()}. + -type tictactree() :: #tictactree{}. -type segment48() :: @@ -114,6 +114,11 @@ {binary(), integer(), integer(), integer(), binary()}. -type tree_size() :: xxsmall|xsmall|small|medium|large|xlarge. +-type bin_extract_fun() + :: + fun((term(), term()) -> + {binary(), binary()|{is_hash, non_neg_integer()}} + ). -export_type([tictactree/0, segment48/0, tree_size/0]). @@ -137,7 +142,7 @@ new_tree(TreeID) -> new_tree(TreeID, Size) -> Width = get_size(Size), Lv1Width = Width * ?HASH_SIZE * 8, - Lv1Init = <<0:Lv1Width/integer>>, + Lv1Init = to_level1_map(<<0:Lv1Width/integer>>), Lv2Init = array:new([{size, Width}, {default, ?EMPTY}]), #tictactree{treeID = TreeID, size = Size, @@ -159,9 +164,16 @@ export_tree(Tree) -> L2 = lists:foldl(EncodeL2Fun, [], lists:seq(0, Tree#tictactree.width - 1)), {struct, - [{<<"level1">>, base64:encode_to_string(Tree#tictactree.level1)}, - {<<"level2">>, {struct, lists:reverse(L2)}} - ]}. + [{<<"level1">>, + base64:encode_to_string( + from_level1_map(Tree#tictactree.level1) + ) + }, + {<<"level2">>, + {struct, lists:reverse(L2)} + } + ] + }. -spec import_tree({struct, list()}) -> tictactree(). %% @doc @@ -183,15 +195,16 @@ import_tree(ExportedTree) -> array:set(binary_to_integer(X), L2SegBin, L2Array) end, Lv2 = lists:foldl(FoldFun, Lv2Init, L2List), + garbage_collect(), #tictactree{treeID = import, size = Size, width = Width, segment_count = Width * ?L2_CHUNKSIZE, - level1 = L1Bin, + level1 = to_level1_map(L1Bin), level2 = Lv2}. --spec add_kv(tictactree(), term(), term(), fun()) -> tictactree(). +-spec add_kv(tictactree(), term(), term(), bin_extract_fun()) -> tictactree(). %% @doc %% Add a Key and value to a tictactree using the BinExtractFun to extract a %% binary from the Key and value from which to generate the hash. The @@ -200,8 +213,9 @@ import_tree(ExportedTree) -> add_kv(TicTacTree, Key, Value, BinExtractFun) -> add_kv(TicTacTree, Key, Value, BinExtractFun, false). --spec add_kv(tictactree(), term(), term(), fun(), boolean()) - -> tictactree()|{tictactree(), integer()}. +-spec add_kv( + tictactree(), term(), term(), bin_extract_fun(), boolean()) + -> tictactree()|{tictactree(), integer()}. %% @doc %% add_kv with ability to return segment ID of Key added add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) -> @@ -268,7 +282,7 @@ find_dirtysegments(SrcBin, SinkBin) -> %% @doc %% Return the level1 binary for a tree. fetch_root(TicTacTree) -> - TicTacTree#tictactree.level1. + from_level1_map(TicTacTree#tictactree.level1). -spec fetch_leaves(tictactree(), list(integer())) -> list(). %% @doc @@ -303,11 +317,17 @@ merge_trees(TreeA, TreeB) -> NewLevel2 = merge_binaries(L2A, L2B), array:set(SQN, NewLevel2, MergeL2) end, - NewLevel2 = lists:foldl(MergeFun, - MergedTree#tictactree.level2, - lists:seq(0, MergedTree#tictactree.width - 1)), + NewLevel2 = + lists:foldl( + MergeFun, + MergedTree#tictactree.level2, + lists:seq(0, MergedTree#tictactree.width - 1) + ), - MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}. + MergedTree#tictactree{ + level1 = to_level1_map(NewLevel1), + level2 = NewLevel2 + }. -spec get_segment(integer(), integer()|xxsmall|xsmall|small|medium|large|xlarge) -> @@ -462,6 +482,26 @@ join_segment(BranchID, LeafID) -> %%% Internal functions %%%============================================================================ +-spec to_level1_map(binary()) -> level1_map(). +to_level1_map(L1Bin) -> + to_level1_map_loop(L1Bin, maps:new(), 0). + +to_level1_map_loop(<<>>, L1MapAcc, _Idx) -> + L1MapAcc; +to_level1_map_loop(<>, L1MapAcc, Idx) -> + to_level1_map_loop(Rest, maps:put(Idx, Slice, L1MapAcc), Idx + 1). + + +-spec from_level1_map(level1_map()) -> binary(). +from_level1_map(L1Map) -> + lists:foldl( + fun(I, Acc) -> + <> + end, + <<>>, + lists:seq(0, maps:size(L1Map) - 1) + ). + -spec extract_segment(integer(), tictactree()) -> {integer(), integer(), tree_extract(), tree_extract()}. %% @doc @@ -472,9 +512,10 @@ extract_segment(Segment, TicTacTree) -> Level1Pos = (Segment bsr ?L2_BITSIZE) band (TicTacTree#tictactree.width - 1), + Level1Slice = Level1Pos div 16, Level2BytePos = ?HASH_SIZE * Level2Pos, - Level1BytePos = ?HASH_SIZE * Level1Pos, + Level1BytePos = ?HASH_SIZE * (Level1Pos rem 16), Level2 = get_level2(TicTacTree, Level1Pos), @@ -484,7 +525,7 @@ extract_segment(Segment, TicTacTree) -> PostL2/binary>> = Level2, <> = TicTacTree#tictactree.level1, + PostL1/binary>> = maps:get(Level1Slice, TicTacTree#tictactree.level1), {SegLeaf1, SegLeaf2, @@ -501,16 +542,17 @@ replace_segment(L1Hash, L2Hash, L1Extract, L2Extract, TicTacTree) -> {PreL1, Level1BytePos, Level1Pos, HashIntLength, PostL1} = L1Extract, {PreL2, Level2BytePos, _Level2Pos, HashIntLength, PostL2} = L2Extract, + Level1Slice = Level1Pos div 16, + Level1Upd = <>, Level2Upd = <>, - TicTacTree#tictactree{level1 = Level1Upd, - level2 = array:set(Level1Pos, - Level2Upd, - TicTacTree#tictactree.level2)}. + TicTacTree#tictactree{ + level1 = maps:put(Level1Slice, Level1Upd, TicTacTree#tictactree.level1), + level2 = array:set(Level1Pos, Level2Upd, TicTacTree#tictactree.level2)}. get_level2(TicTacTree, L1Pos) -> case array:get(L1Pos, TicTacTree#tictactree.level2) of @@ -576,7 +618,7 @@ merge_binaries(BinA, BinB) -> -include_lib("eunit/include/eunit.hrl"). checktree(TicTacTree) -> - checktree(TicTacTree#tictactree.level1, TicTacTree, 0). + checktree(from_level1_map(TicTacTree#tictactree.level1), TicTacTree, 0). checktree(<<>>, TicTacTree, Counter) -> true = TicTacTree#tictactree.width == Counter; @@ -870,6 +912,75 @@ find_dirtysegments_withanemptytree_test() -> ?assertMatch(ExpectedAnswer, find_dirtysegments(fetch_root(T3), <<>>)). +tictac_perf_test_() -> + {timeout, 120, fun tictac_perf_tester_multi/0}. + +tictac_perf_tester_multi() -> + tictac_perf_tester(1000000, large), + tictac_perf_tester(40000, small). + +tictac_perf_tester(KeyCount, TreeSize) -> + io:format(user, "Testing with Tree Size ~w~n", [TreeSize]), + io:format(user, "Generating ~w Keys and Hashes~n", [KeyCount]), + SW0 = os:system_time(millisecond), + KVL = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(1, KeyCount) + ), + + SW1 = os:system_time(millisecond), + io:format(user, "Generating Keys took ~w milliseconds~n", [SW1 - SW0]), + + Tree = new_tree(test, TreeSize), + log_memory_footprint(), + + SW2 = os:system_time(millisecond), + io:format(user, "Generating new tree took ~w milliseconds~n", [SW2 - SW1]), + + UpdTree = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree, + KVL + ), + + SW3 = os:system_time(millisecond), + io:format(user, "Loading tree took ~w milliseconds~n", [SW3 - SW2]), + log_memory_footprint(), + + ExportedTree = export_tree(UpdTree), + + SW4 = os:system_time(millisecond), + io:format(user, "Exporting tree took ~w milliseconds~n", [SW4 - SW3]), + + ImportedTree = import_tree(ExportedTree), + + SW5 = os:system_time(millisecond), + io:format(user, "Importing tree took ~w milliseconds~n", [SW5 - SW4]), + + log_memory_footprint(), + + io:format(user, "Garbage collect~n", []), + garbage_collect(), + + log_memory_footprint(), + + ?assertMatch([], find_dirtyleaves(UpdTree, ImportedTree)). + +to_key(N) -> + list_to_binary(io_lib:format("K~8..0B", [N])). + +to_bucket(N) -> + list_to_binary(io_lib:format("B~8..0B", [N])). + +log_memory_footprint() -> + io:format(user, "Memory footprint ~0p~n", [erlang:memory()]). -endif. From 6b3d00351effc6b093c2d353147ff8ec515d7290 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 9 Jul 2024 21:36:58 +0100 Subject: [PATCH 2/6] Tidy-up --- src/leveled_tictac.erl | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index bb0e2b3d..fccc4e39 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -196,12 +196,14 @@ import_tree(ExportedTree) -> end, Lv2 = lists:foldl(FoldFun, Lv2Init, L2List), garbage_collect(), - #tictactree{treeID = import, - size = Size, - width = Width, - segment_count = Width * ?L2_CHUNKSIZE, - level1 = to_level1_map(L1Bin), - level2 = Lv2}. + #tictactree{ + treeID = import, + size = Size, + width = Width, + segment_count = Width * ?L2_CHUNKSIZE, + level1 = to_level1_map(L1Bin), + level2 = Lv2 + }. -spec add_kv(tictactree(), term(), term(), bin_extract_fun()) -> tictactree(). @@ -229,14 +231,15 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) -> SegLeaf2Upd = SegLeaf2 bxor SegChangeHash, SegLeaf1Upd = SegLeaf1 bxor SegChangeHash, + UpdatedTree = + replace_segment( + SegLeaf1Upd, SegLeaf2Upd, L1Extract, L2Extract, TicTacTree + ), case ReturnSegment of true -> - {replace_segment(SegLeaf1Upd, SegLeaf2Upd, - L1Extract, L2Extract, TicTacTree), - Segment}; + {UpdatedTree, Segment}; false -> - replace_segment(SegLeaf1Upd, SegLeaf2Upd, - L1Extract, L2Extract, TicTacTree) + UpdatedTree end. -spec alter_segment(integer(), integer(), tictactree()) -> tictactree(). From 16f542258bbf1b384279be9619ba05ac424d76e3 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 9 Jul 2024 22:12:01 +0100 Subject: [PATCH 3/6] Add type --- src/leveled_tictac.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index fccc4e39..9e12e180 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -101,7 +101,7 @@ width :: integer(), segment_count :: integer(), level1 :: level1_map(), - level2 :: any() % an array - but OTP compatibility + level2 :: array:array() }). -type level1_map() :: #{non_neg_integer() => binary()}. From 45c05d4e258ebc611c648cd4a2edebd56e17860a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 10 Jul 2024 08:12:34 +0100 Subject: [PATCH 4/6] Remove ++ requiring copy of Acc Rely on mechanism producing a sorted result, not sorting --- src/leveled_tictac.erl | 111 ++++++++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 30 deletions(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 9e12e180..47121dc3 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -186,8 +186,8 @@ import_tree(ExportedTree) -> Sizes = lists:map(fun(SizeTag) -> {SizeTag, get_size(SizeTag)} end, ?VALID_SIZES), Width = byte_size(L1Bin) div ?HASH_SIZE, - {Size, Width} = lists:keyfind(Width, 2, Sizes), - Width = get_size(Size), + {Size, _Width} = lists:keyfind(Width, 2, Sizes), + true = get_size(Size) == Width, Lv2Init = array:new([{size, Width}]), FoldFun = fun({X, EncodedL2SegBin}, L2Array) -> @@ -258,8 +258,9 @@ alter_segment(Segment, Hash, Tree) -> %% Returns a list of segment IDs which hold differences between the state %% represented by the two trees. find_dirtyleaves(SrcTree, SnkTree) -> - Size = SrcTree#tictactree.size, - Size = SnkTree#tictactree.size, + SizeSrc = SrcTree#tictactree.size, + SizeSnk = SnkTree#tictactree.size, + true = SizeSrc == SizeSnk, IdxList = find_dirtysegments(fetch_root(SrcTree), fetch_root(SnkTree)), SrcLeaves = fetch_leaves(SrcTree, IdxList), @@ -267,12 +268,18 @@ find_dirtyleaves(SrcTree, SnkTree) -> FoldFun = fun(Idx, Acc) -> - {Idx, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), - {Idx, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves), + SrcLeaf = element(2, lists:keyfind(Idx, 1, SrcLeaves)), + SnkLeaf = element(2, lists:keyfind(Idx, 1, SnkLeaves)), L2IdxList = segmentcompare(SrcLeaf, SnkLeaf), - Acc ++ lists:map(fun(X) -> X + Idx * ?L2_CHUNKSIZE end, L2IdxList) + lists:foldl( + fun(X, InnerAcc) -> + SegID = X + Idx * ?L2_CHUNKSIZE, + [SegID|InnerAcc] + end, + Acc, + L2IdxList) end, - lists:sort(lists:foldl(FoldFun, [], IdxList)). + lists:foldl(FoldFun, [], IdxList). -spec find_dirtysegments(binary(), binary()) -> list(integer()). %% @doc @@ -332,9 +339,9 @@ merge_trees(TreeA, TreeB) -> level2 = NewLevel2 }. --spec get_segment(integer(), - integer()|xxsmall|xsmall|small|medium|large|xlarge) -> - integer(). +-spec get_segment( + integer(), + integer()|xxsmall|xsmall|small|medium|large|xlarge) -> integer(). %% @doc %% Return the segment ID for a Key. Can pass the tree size or the actual %% segment count derived from the size @@ -362,8 +369,8 @@ tictac_hash(BinKey, Val) when is_binary(BinKey) -> end, {HashKeyToSeg, AltHashKey bxor HashVal}. --spec keyto_doublesegment32(binary()) - -> {non_neg_integer(), non_neg_integer()}. +-spec keyto_doublesegment32( + binary()) -> {non_neg_integer(), non_neg_integer()}. %% @doc %% Used in tictac_hash/2 to provide an alternative hash of the key to bxor with %% the value, as well as the segment hash to locate the leaf of the tree to be @@ -395,8 +402,8 @@ keyto_segment48(BinKey) -> _Rest/binary>> = crypto:hash(md5, BinKey), {segment_hash, SegmentID, ExtraHash, AltHash}. --spec generate_segmentfilter_list(list(integer()), tree_size()) - -> false|list(integer()). +-spec generate_segmentfilter_list( + list(integer()), tree_size()) -> false|list(integer()). %% @doc %% Cannot accelerate segment listing for trees below certain sizes, so check %% the creation of segment filter lists with this function @@ -421,8 +428,8 @@ generate_segmentfilter_list(SegmentList, Size) -> SegmentList end. --spec adjust_segmentmatch_list(list(integer()), tree_size(), tree_size()) - -> list(integer()). +-spec adjust_segmentmatch_list( + list(integer()), tree_size(), tree_size()) -> list(integer()). %% @doc %% If we have dirty segments discovered by comparing trees of size CompareSize, %% and we want to see if it matches a segment for a key which was created for a @@ -464,8 +471,8 @@ adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> end. --spec match_segment({integer(), tree_size()}, {integer(), tree_size()}) - -> boolean(). +-spec match_segment( + {integer(), tree_size()}, {integer(), tree_size()}) -> boolean(). %% @doc %% Does segment A match segment B - given that segment A was generated using %% Tree size A and segment B was generated using Tree Size B @@ -505,8 +512,9 @@ from_level1_map(L1Map) -> lists:seq(0, maps:size(L1Map) - 1) ). --spec extract_segment(integer(), tictactree()) -> - {integer(), integer(), tree_extract(), tree_extract()}. +-spec extract_segment( + integer(), tictactree()) -> + {integer(), integer(), tree_extract(), tree_extract()}. %% @doc %% Extract the Level 1 and Level 2 slices from a tree to prepare an update extract_segment(Segment, TicTacTree) -> @@ -536,9 +544,9 @@ extract_segment(Segment, TicTacTree) -> {PreL2, Level2BytePos, Level2Pos, HashIntLength, PostL2}}. --spec replace_segment(integer(), integer(), - tree_extract(), tree_extract(), - tictactree()) -> tictactree(). +-spec replace_segment( + integer(), integer(), tree_extract(), tree_extract(), tictactree()) -> + tictactree(). %% @doc %% Replace a slice of a tree replace_segment(L1Hash, L2Hash, L1Extract, L2Extract, TicTacTree) -> @@ -598,7 +606,7 @@ segmentcompare(SrcBin, SnkBin, Acc, Counter) -> <> = SrcBin, <> = SnkBin, case SrcHash of - SnkHash -> + H when H == SnkHash -> segmentcompare(SrcTail, SnkTail, Acc, Counter + 1); _ -> segmentcompare(SrcTail, SnkTail, [Counter|Acc], Counter + 1) @@ -701,6 +709,7 @@ simple_test_withsize(Size) -> DL0 = find_dirtyleaves(Tree1, Tree0), ?assertMatch(true, lists:member(GetSegFun(K1), DL0)), DL1 = find_dirtyleaves(Tree3, Tree1), + ?assertMatch(DL1, lists:sort(DL1)), ?assertMatch(true, lists:member(GetSegFun(K2), DL1)), ?assertMatch(true, lists:member(GetSegFun(K3), DL1)), ?assertMatch(false, lists:member(GetSegFun(K1), DL1)), @@ -710,6 +719,53 @@ simple_test_withsize(Size) -> ImpTree3 = import_tree(ExpTree3), ?assertMatch(DL1, find_dirtyleaves(ImpTree3, Tree1)). +dirtyleaves_sorted_test() -> + Tree0 = new_tree(test, large), + KVL1 = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(1, 50000) + ), + KVL2 = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(100000, 150000) + ), + Tree1 = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree0, + KVL1 + ), + Tree2 = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree0, + KVL2 + ), + SW0 = os:system_time(millisecond), + DL1 = find_dirtyleaves(Tree1, Tree2), + DL2 = find_dirtyleaves(Tree2, Tree1), + io:format( + user, + "Finding approx 100K dirty leaves twice in ~w milliseconds~n", + [os:system_time(millisecond) - SW0] + ), + ?assertMatch(DL1, lists:sort(DL1)), + ?assertMatch(DL2, lists:sort(DL2)), + ?assertMatch(DL1, DL2). + + merge_bysize_small_test() -> merge_test_withsize(small). @@ -969,11 +1025,6 @@ tictac_perf_tester(KeyCount, TreeSize) -> log_memory_footprint(), - io:format(user, "Garbage collect~n", []), - garbage_collect(), - - log_memory_footprint(), - ?assertMatch([], find_dirtyleaves(UpdTree, ImportedTree)). to_key(N) -> From 9a03e0f1fa392b8bedcb193afa0e36899298e008 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 3 Sep 2024 16:04:43 +0100 Subject: [PATCH 5/6] Update src/leveled_tictac.erl Co-authored-by: Thomas Arts --- src/leveled_tictac.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 47121dc3..4c750449 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -187,6 +187,7 @@ import_tree(ExportedTree) -> ?VALID_SIZES), Width = byte_size(L1Bin) div ?HASH_SIZE, {Size, _Width} = lists:keyfind(Width, 2, Sizes), + %% assert that side is indeed the provided width true = get_size(Size) == Width, Lv2Init = array:new([{size, Width}]), FoldFun = From 18f7516a85a2650615fe9ae69e5440c6146d1759 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 3 Sep 2024 16:18:51 +0100 Subject: [PATCH 6/6] Update following review --- src/leveled_tictac.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 47121dc3..5195851e 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -268,8 +268,8 @@ find_dirtyleaves(SrcTree, SnkTree) -> FoldFun = fun(Idx, Acc) -> - SrcLeaf = element(2, lists:keyfind(Idx, 1, SrcLeaves)), - SnkLeaf = element(2, lists:keyfind(Idx, 1, SnkLeaves)), + {_, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), + {_, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves), L2IdxList = segmentcompare(SrcLeaf, SnkLeaf), lists:foldl( fun(X, InnerAcc) -> @@ -279,6 +279,7 @@ find_dirtyleaves(SrcTree, SnkTree) -> Acc, L2IdxList) end, + %% Output not sorted, as sorted by the design of the construction process lists:foldl(FoldFun, [], IdxList). -spec find_dirtysegments(binary(), binary()) -> list(integer()).