diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index eeb358fa..4c9e7cbb 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -159,7 +159,7 @@ -behaviour(gen_server). --include("include/leveled.hrl"). +-include("leveled.hrl"). -export([ init/1, @@ -207,23 +207,17 @@ -export([clean_testdir/1]). -endif. --define(MAX_WORK_WAIT, 300). -define(MANIFEST_FP, "ledger_manifest"). -define(FILES_FP, "ledger_files"). --define(CURRENT_FILEX, "crr"). --define(PENDING_FILEX, "pnd"). -define(SST_FILEX, ".sst"). -define(ARCHIVE_FILEX, ".bak"). -define(SUPER_MAX_TABLE_SIZE, 40000). --define(PROMPT_WAIT_ONL0, 5). -define(WORKQUEUE_BACKLOG_TOLERANCE, 4). -define(COIN_SIDECOUNT, 4). -define(SLOW_FETCH, 500000). % Log a very slow fetch - longer than 500ms -define(FOLD_SCANWIDTH, 32). -define(ITERATOR_SCANWIDTH, 4). -define(ITERATOR_MINSCANWIDTH, 1). --define(TIMING_SAMPLECOUNTDOWN, 10000). --define(TIMING_SAMPLESIZE, 100). -define(SHUTDOWN_LOOPS, 10). -define(SHUTDOWN_PAUSE, 10000). % How long to wait for snapshots to be released on shutdown diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index e84c46a3..d4b07985 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -52,8 +52,6 @@ -module(leveled_tictac). --include("include/leveled.hrl"). - -export([ new_tree/1, new_tree/2, @@ -102,10 +100,12 @@ size :: tree_size(), width :: integer(), segment_count :: integer(), - level1 :: binary(), - level2 :: any() % an array - but OTP compatibility + level1 :: level1_map(), + level2 :: array:array() }). +-type level1_map() :: #{non_neg_integer() => binary()}. + -type tictactree() :: #tictactree{}. -type segment48() :: @@ -114,6 +114,11 @@ {binary(), integer(), integer(), integer(), binary()}. -type tree_size() :: xxsmall|xsmall|small|medium|large|xlarge. +-type bin_extract_fun() + :: + fun((term(), term()) -> + {binary(), binary()|{is_hash, non_neg_integer()}} + ). -export_type([tictactree/0, segment48/0, tree_size/0]). @@ -137,7 +142,7 @@ new_tree(TreeID) -> new_tree(TreeID, Size) -> Width = get_size(Size), Lv1Width = Width * ?HASH_SIZE * 8, - Lv1Init = <<0:Lv1Width/integer>>, + Lv1Init = to_level1_map(<<0:Lv1Width/integer>>), Lv2Init = array:new([{size, Width}, {default, ?EMPTY}]), #tictactree{treeID = TreeID, size = Size, @@ -159,9 +164,16 @@ export_tree(Tree) -> L2 = lists:foldl(EncodeL2Fun, [], lists:seq(0, Tree#tictactree.width - 1)), {struct, - [{<<"level1">>, base64:encode_to_string(Tree#tictactree.level1)}, - {<<"level2">>, {struct, lists:reverse(L2)}} - ]}. + [{<<"level1">>, + base64:encode_to_string( + from_level1_map(Tree#tictactree.level1) + ) + }, + {<<"level2">>, + {struct, lists:reverse(L2)} + } + ] + }. -spec import_tree({struct, list()}) -> tictactree(). %% @doc @@ -174,8 +186,9 @@ import_tree(ExportedTree) -> Sizes = lists:map(fun(SizeTag) -> {SizeTag, get_size(SizeTag)} end, ?VALID_SIZES), Width = byte_size(L1Bin) div ?HASH_SIZE, - {Size, Width} = lists:keyfind(Width, 2, Sizes), - Width = get_size(Size), + {Size, _Width} = lists:keyfind(Width, 2, Sizes), + %% assert that side is indeed the provided width + true = get_size(Size) == Width, Lv2Init = array:new([{size, Width}]), FoldFun = fun({X, EncodedL2SegBin}, L2Array) -> @@ -183,15 +196,18 @@ import_tree(ExportedTree) -> array:set(binary_to_integer(X), L2SegBin, L2Array) end, Lv2 = lists:foldl(FoldFun, Lv2Init, L2List), - #tictactree{treeID = import, - size = Size, - width = Width, - segment_count = Width * ?L2_CHUNKSIZE, - level1 = L1Bin, - level2 = Lv2}. - - --spec add_kv(tictactree(), term(), term(), fun()) -> tictactree(). + garbage_collect(), + #tictactree{ + treeID = import, + size = Size, + width = Width, + segment_count = Width * ?L2_CHUNKSIZE, + level1 = to_level1_map(L1Bin), + level2 = Lv2 + }. + + +-spec add_kv(tictactree(), term(), term(), bin_extract_fun()) -> tictactree(). %% @doc %% Add a Key and value to a tictactree using the BinExtractFun to extract a %% binary from the Key and value from which to generate the hash. The @@ -200,8 +216,9 @@ import_tree(ExportedTree) -> add_kv(TicTacTree, Key, Value, BinExtractFun) -> add_kv(TicTacTree, Key, Value, BinExtractFun, false). --spec add_kv(tictactree(), term(), term(), fun(), boolean()) - -> tictactree()|{tictactree(), integer()}. +-spec add_kv( + tictactree(), term(), term(), bin_extract_fun(), boolean()) + -> tictactree()|{tictactree(), integer()}. %% @doc %% add_kv with ability to return segment ID of Key added add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) -> @@ -215,14 +232,15 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) -> SegLeaf2Upd = SegLeaf2 bxor SegChangeHash, SegLeaf1Upd = SegLeaf1 bxor SegChangeHash, + UpdatedTree = + replace_segment( + SegLeaf1Upd, SegLeaf2Upd, L1Extract, L2Extract, TicTacTree + ), case ReturnSegment of true -> - {replace_segment(SegLeaf1Upd, SegLeaf2Upd, - L1Extract, L2Extract, TicTacTree), - Segment}; + {UpdatedTree, Segment}; false -> - replace_segment(SegLeaf1Upd, SegLeaf2Upd, - L1Extract, L2Extract, TicTacTree) + UpdatedTree end. -spec alter_segment(integer(), integer(), tictactree()) -> tictactree(). @@ -241,8 +259,9 @@ alter_segment(Segment, Hash, Tree) -> %% Returns a list of segment IDs which hold differences between the state %% represented by the two trees. find_dirtyleaves(SrcTree, SnkTree) -> - Size = SrcTree#tictactree.size, - Size = SnkTree#tictactree.size, + SizeSrc = SrcTree#tictactree.size, + SizeSnk = SnkTree#tictactree.size, + true = SizeSrc == SizeSnk, IdxList = find_dirtysegments(fetch_root(SrcTree), fetch_root(SnkTree)), SrcLeaves = fetch_leaves(SrcTree, IdxList), @@ -250,12 +269,19 @@ find_dirtyleaves(SrcTree, SnkTree) -> FoldFun = fun(Idx, Acc) -> - {Idx, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), - {Idx, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves), + {_, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), + {_, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves), L2IdxList = segmentcompare(SrcLeaf, SnkLeaf), - Acc ++ lists:map(fun(X) -> X + Idx * ?L2_CHUNKSIZE end, L2IdxList) + lists:foldl( + fun(X, InnerAcc) -> + SegID = X + Idx * ?L2_CHUNKSIZE, + [SegID|InnerAcc] + end, + Acc, + L2IdxList) end, - lists:sort(lists:foldl(FoldFun, [], IdxList)). + %% Output not sorted, as sorted by the design of the construction process + lists:foldl(FoldFun, [], IdxList). -spec find_dirtysegments(binary(), binary()) -> list(integer()). %% @doc @@ -268,7 +294,7 @@ find_dirtysegments(SrcBin, SinkBin) -> %% @doc %% Return the level1 binary for a tree. fetch_root(TicTacTree) -> - TicTacTree#tictactree.level1. + from_level1_map(TicTacTree#tictactree.level1). -spec fetch_leaves(tictactree(), list(integer())) -> list(). %% @doc @@ -303,15 +329,21 @@ merge_trees(TreeA, TreeB) -> NewLevel2 = merge_binaries(L2A, L2B), array:set(SQN, NewLevel2, MergeL2) end, - NewLevel2 = lists:foldl(MergeFun, - MergedTree#tictactree.level2, - lists:seq(0, MergedTree#tictactree.width - 1)), + NewLevel2 = + lists:foldl( + MergeFun, + MergedTree#tictactree.level2, + lists:seq(0, MergedTree#tictactree.width - 1) + ), - MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}. - --spec get_segment(integer(), - integer()|xxsmall|xsmall|small|medium|large|xlarge) -> - integer(). + MergedTree#tictactree{ + level1 = to_level1_map(NewLevel1), + level2 = NewLevel2 + }. + +-spec get_segment( + integer(), + integer()|xxsmall|xsmall|small|medium|large|xlarge) -> integer(). %% @doc %% Return the segment ID for a Key. Can pass the tree size or the actual %% segment count derived from the size @@ -339,8 +371,8 @@ tictac_hash(BinKey, Val) when is_binary(BinKey) -> end, {HashKeyToSeg, AltHashKey bxor HashVal}. --spec keyto_doublesegment32(binary()) - -> {non_neg_integer(), non_neg_integer()}. +-spec keyto_doublesegment32( + binary()) -> {non_neg_integer(), non_neg_integer()}. %% @doc %% Used in tictac_hash/2 to provide an alternative hash of the key to bxor with %% the value, as well as the segment hash to locate the leaf of the tree to be @@ -372,8 +404,8 @@ keyto_segment48(BinKey) -> _Rest/binary>> = crypto:hash(md5, BinKey), {segment_hash, SegmentID, ExtraHash, AltHash}. --spec generate_segmentfilter_list(list(integer()), tree_size()) - -> false|list(integer()). +-spec generate_segmentfilter_list( + list(integer()), tree_size()) -> false|list(integer()). %% @doc %% Cannot accelerate segment listing for trees below certain sizes, so check %% the creation of segment filter lists with this function @@ -398,8 +430,8 @@ generate_segmentfilter_list(SegmentList, Size) -> SegmentList end. --spec adjust_segmentmatch_list(list(integer()), tree_size(), tree_size()) - -> list(integer()). +-spec adjust_segmentmatch_list( + list(integer()), tree_size(), tree_size()) -> list(integer()). %% @doc %% If we have dirty segments discovered by comparing trees of size CompareSize, %% and we want to see if it matches a segment for a key which was created for a @@ -441,8 +473,8 @@ adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> end. --spec match_segment({integer(), tree_size()}, {integer(), tree_size()}) - -> boolean(). +-spec match_segment( + {integer(), tree_size()}, {integer(), tree_size()}) -> boolean(). %% @doc %% Does segment A match segment B - given that segment A was generated using %% Tree size A and segment B was generated using Tree Size B @@ -462,8 +494,29 @@ join_segment(BranchID, LeafID) -> %%% Internal functions %%%============================================================================ --spec extract_segment(integer(), tictactree()) -> - {integer(), integer(), tree_extract(), tree_extract()}. +-spec to_level1_map(binary()) -> level1_map(). +to_level1_map(L1Bin) -> + to_level1_map_loop(L1Bin, maps:new(), 0). + +to_level1_map_loop(<<>>, L1MapAcc, _Idx) -> + L1MapAcc; +to_level1_map_loop(<>, L1MapAcc, Idx) -> + to_level1_map_loop(Rest, maps:put(Idx, Slice, L1MapAcc), Idx + 1). + + +-spec from_level1_map(level1_map()) -> binary(). +from_level1_map(L1Map) -> + lists:foldl( + fun(I, Acc) -> + <> + end, + <<>>, + lists:seq(0, maps:size(L1Map) - 1) + ). + +-spec extract_segment( + integer(), tictactree()) -> + {integer(), integer(), tree_extract(), tree_extract()}. %% @doc %% Extract the Level 1 and Level 2 slices from a tree to prepare an update extract_segment(Segment, TicTacTree) -> @@ -472,9 +525,10 @@ extract_segment(Segment, TicTacTree) -> Level1Pos = (Segment bsr ?L2_BITSIZE) band (TicTacTree#tictactree.width - 1), + Level1Slice = Level1Pos div 16, Level2BytePos = ?HASH_SIZE * Level2Pos, - Level1BytePos = ?HASH_SIZE * Level1Pos, + Level1BytePos = ?HASH_SIZE * (Level1Pos rem 16), Level2 = get_level2(TicTacTree, Level1Pos), @@ -484,7 +538,7 @@ extract_segment(Segment, TicTacTree) -> PostL2/binary>> = Level2, <> = TicTacTree#tictactree.level1, + PostL1/binary>> = maps:get(Level1Slice, TicTacTree#tictactree.level1), {SegLeaf1, SegLeaf2, @@ -492,25 +546,26 @@ extract_segment(Segment, TicTacTree) -> {PreL2, Level2BytePos, Level2Pos, HashIntLength, PostL2}}. --spec replace_segment(integer(), integer(), - tree_extract(), tree_extract(), - tictactree()) -> tictactree(). +-spec replace_segment( + integer(), integer(), tree_extract(), tree_extract(), tictactree()) -> + tictactree(). %% @doc %% Replace a slice of a tree replace_segment(L1Hash, L2Hash, L1Extract, L2Extract, TicTacTree) -> {PreL1, Level1BytePos, Level1Pos, HashIntLength, PostL1} = L1Extract, {PreL2, Level2BytePos, _Level2Pos, HashIntLength, PostL2} = L2Extract, + Level1Slice = Level1Pos div 16, + Level1Upd = <>, Level2Upd = <>, - TicTacTree#tictactree{level1 = Level1Upd, - level2 = array:set(Level1Pos, - Level2Upd, - TicTacTree#tictactree.level2)}. + TicTacTree#tictactree{ + level1 = maps:put(Level1Slice, Level1Upd, TicTacTree#tictactree.level1), + level2 = array:set(Level1Pos, Level2Upd, TicTacTree#tictactree.level2)}. get_level2(TicTacTree, L1Pos) -> case array:get(L1Pos, TicTacTree#tictactree.level2) of @@ -553,7 +608,7 @@ segmentcompare(SrcBin, SnkBin, Acc, Counter) -> <> = SrcBin, <> = SnkBin, case SrcHash of - SnkHash -> + H when H == SnkHash -> segmentcompare(SrcTail, SnkTail, Acc, Counter + 1); _ -> segmentcompare(SrcTail, SnkTail, [Counter|Acc], Counter + 1) @@ -576,7 +631,7 @@ merge_binaries(BinA, BinB) -> -include_lib("eunit/include/eunit.hrl"). checktree(TicTacTree) -> - checktree(TicTacTree#tictactree.level1, TicTacTree, 0). + checktree(from_level1_map(TicTacTree#tictactree.level1), TicTacTree, 0). checktree(<<>>, TicTacTree, Counter) -> true = TicTacTree#tictactree.width == Counter; @@ -656,6 +711,7 @@ simple_test_withsize(Size) -> DL0 = find_dirtyleaves(Tree1, Tree0), ?assertMatch(true, lists:member(GetSegFun(K1), DL0)), DL1 = find_dirtyleaves(Tree3, Tree1), + ?assertMatch(DL1, lists:sort(DL1)), ?assertMatch(true, lists:member(GetSegFun(K2), DL1)), ?assertMatch(true, lists:member(GetSegFun(K3), DL1)), ?assertMatch(false, lists:member(GetSegFun(K1), DL1)), @@ -665,6 +721,53 @@ simple_test_withsize(Size) -> ImpTree3 = import_tree(ExpTree3), ?assertMatch(DL1, find_dirtyleaves(ImpTree3, Tree1)). +dirtyleaves_sorted_test() -> + Tree0 = new_tree(test, large), + KVL1 = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(1, 50000) + ), + KVL2 = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(100000, 150000) + ), + Tree1 = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree0, + KVL1 + ), + Tree2 = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree0, + KVL2 + ), + SW0 = os:system_time(millisecond), + DL1 = find_dirtyleaves(Tree1, Tree2), + DL2 = find_dirtyleaves(Tree2, Tree1), + io:format( + user, + "Finding approx 100K dirty leaves twice in ~w milliseconds~n", + [os:system_time(millisecond) - SW0] + ), + ?assertMatch(DL1, lists:sort(DL1)), + ?assertMatch(DL2, lists:sort(DL2)), + ?assertMatch(DL1, DL2). + + merge_bysize_small_test() -> merge_test_withsize(small). @@ -870,6 +973,70 @@ find_dirtysegments_withanemptytree_test() -> ?assertMatch(ExpectedAnswer, find_dirtysegments(fetch_root(T3), <<>>)). +tictac_perf_test_() -> + {timeout, 120, fun tictac_perf_tester_multi/0}. + +tictac_perf_tester_multi() -> + tictac_perf_tester(1000000, large), + tictac_perf_tester(40000, small). + +tictac_perf_tester(KeyCount, TreeSize) -> + io:format(user, "Testing with Tree Size ~w~n", [TreeSize]), + io:format(user, "Generating ~w Keys and Hashes~n", [KeyCount]), + SW0 = os:system_time(millisecond), + KVL = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(1, KeyCount) + ), + + SW1 = os:system_time(millisecond), + io:format(user, "Generating Keys took ~w milliseconds~n", [SW1 - SW0]), + + Tree = new_tree(test, TreeSize), + log_memory_footprint(), + + SW2 = os:system_time(millisecond), + io:format(user, "Generating new tree took ~w milliseconds~n", [SW2 - SW1]), + + UpdTree = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree, + KVL + ), + + SW3 = os:system_time(millisecond), + io:format(user, "Loading tree took ~w milliseconds~n", [SW3 - SW2]), + log_memory_footprint(), + + ExportedTree = export_tree(UpdTree), + + SW4 = os:system_time(millisecond), + io:format(user, "Exporting tree took ~w milliseconds~n", [SW4 - SW3]), + + ImportedTree = import_tree(ExportedTree), + + SW5 = os:system_time(millisecond), + io:format(user, "Importing tree took ~w milliseconds~n", [SW5 - SW4]), + + log_memory_footprint(), + + ?assertMatch([], find_dirtyleaves(UpdTree, ImportedTree)). + +to_key(N) -> + list_to_binary(io_lib:format("K~8..0B", [N])). + +to_bucket(N) -> + list_to_binary(io_lib:format("B~8..0B", [N])). + +log_memory_footprint() -> + io:format(user, "Memory footprint ~0p~n", [erlang:memory()]). -endif.