diff --git a/rebar.config b/rebar.config index 43c930b3..f0b17482 100644 --- a/rebar.config +++ b/rebar.config @@ -22,7 +22,10 @@ {plugins, [rebar_eqc]} ]}, {test, [{extra_src_dirs, ["test/end_to_end", "test/property"]} - ]} + ]}, + {perf_full, [{erl_opts, [{d, performance, riak_fullperf}]}]}, + {perf_mini, [{erl_opts, [{d, performance, riak_miniperf}]}]}, + {perf_prof, [{erl_opts, [{d, performance, riak_profileperf}]}]} ]}. {deps, [ diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index eeb358fa..4c9e7cbb 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -159,7 +159,7 @@ -behaviour(gen_server). --include("include/leveled.hrl"). +-include("leveled.hrl"). -export([ init/1, @@ -207,23 +207,17 @@ -export([clean_testdir/1]). -endif. --define(MAX_WORK_WAIT, 300). -define(MANIFEST_FP, "ledger_manifest"). -define(FILES_FP, "ledger_files"). --define(CURRENT_FILEX, "crr"). --define(PENDING_FILEX, "pnd"). -define(SST_FILEX, ".sst"). -define(ARCHIVE_FILEX, ".bak"). -define(SUPER_MAX_TABLE_SIZE, 40000). --define(PROMPT_WAIT_ONL0, 5). -define(WORKQUEUE_BACKLOG_TOLERANCE, 4). -define(COIN_SIDECOUNT, 4). -define(SLOW_FETCH, 500000). % Log a very slow fetch - longer than 500ms -define(FOLD_SCANWIDTH, 32). -define(ITERATOR_SCANWIDTH, 4). -define(ITERATOR_MINSCANWIDTH, 1). --define(TIMING_SAMPLECOUNTDOWN, 10000). --define(TIMING_SAMPLESIZE, 100). -define(SHUTDOWN_LOOPS, 10). -define(SHUTDOWN_PAUSE, 10000). % How long to wait for snapshots to be released on shutdown diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index e84c46a3..d4b07985 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -52,8 +52,6 @@ -module(leveled_tictac). --include("include/leveled.hrl"). - -export([ new_tree/1, new_tree/2, @@ -102,10 +100,12 @@ size :: tree_size(), width :: integer(), segment_count :: integer(), - level1 :: binary(), - level2 :: any() % an array - but OTP compatibility + level1 :: level1_map(), + level2 :: array:array() }). +-type level1_map() :: #{non_neg_integer() => binary()}. + -type tictactree() :: #tictactree{}. -type segment48() :: @@ -114,6 +114,11 @@ {binary(), integer(), integer(), integer(), binary()}. -type tree_size() :: xxsmall|xsmall|small|medium|large|xlarge. +-type bin_extract_fun() + :: + fun((term(), term()) -> + {binary(), binary()|{is_hash, non_neg_integer()}} + ). -export_type([tictactree/0, segment48/0, tree_size/0]). @@ -137,7 +142,7 @@ new_tree(TreeID) -> new_tree(TreeID, Size) -> Width = get_size(Size), Lv1Width = Width * ?HASH_SIZE * 8, - Lv1Init = <<0:Lv1Width/integer>>, + Lv1Init = to_level1_map(<<0:Lv1Width/integer>>), Lv2Init = array:new([{size, Width}, {default, ?EMPTY}]), #tictactree{treeID = TreeID, size = Size, @@ -159,9 +164,16 @@ export_tree(Tree) -> L2 = lists:foldl(EncodeL2Fun, [], lists:seq(0, Tree#tictactree.width - 1)), {struct, - [{<<"level1">>, base64:encode_to_string(Tree#tictactree.level1)}, - {<<"level2">>, {struct, lists:reverse(L2)}} - ]}. + [{<<"level1">>, + base64:encode_to_string( + from_level1_map(Tree#tictactree.level1) + ) + }, + {<<"level2">>, + {struct, lists:reverse(L2)} + } + ] + }. -spec import_tree({struct, list()}) -> tictactree(). %% @doc @@ -174,8 +186,9 @@ import_tree(ExportedTree) -> Sizes = lists:map(fun(SizeTag) -> {SizeTag, get_size(SizeTag)} end, ?VALID_SIZES), Width = byte_size(L1Bin) div ?HASH_SIZE, - {Size, Width} = lists:keyfind(Width, 2, Sizes), - Width = get_size(Size), + {Size, _Width} = lists:keyfind(Width, 2, Sizes), + %% assert that side is indeed the provided width + true = get_size(Size) == Width, Lv2Init = array:new([{size, Width}]), FoldFun = fun({X, EncodedL2SegBin}, L2Array) -> @@ -183,15 +196,18 @@ import_tree(ExportedTree) -> array:set(binary_to_integer(X), L2SegBin, L2Array) end, Lv2 = lists:foldl(FoldFun, Lv2Init, L2List), - #tictactree{treeID = import, - size = Size, - width = Width, - segment_count = Width * ?L2_CHUNKSIZE, - level1 = L1Bin, - level2 = Lv2}. - - --spec add_kv(tictactree(), term(), term(), fun()) -> tictactree(). + garbage_collect(), + #tictactree{ + treeID = import, + size = Size, + width = Width, + segment_count = Width * ?L2_CHUNKSIZE, + level1 = to_level1_map(L1Bin), + level2 = Lv2 + }. + + +-spec add_kv(tictactree(), term(), term(), bin_extract_fun()) -> tictactree(). %% @doc %% Add a Key and value to a tictactree using the BinExtractFun to extract a %% binary from the Key and value from which to generate the hash. The @@ -200,8 +216,9 @@ import_tree(ExportedTree) -> add_kv(TicTacTree, Key, Value, BinExtractFun) -> add_kv(TicTacTree, Key, Value, BinExtractFun, false). --spec add_kv(tictactree(), term(), term(), fun(), boolean()) - -> tictactree()|{tictactree(), integer()}. +-spec add_kv( + tictactree(), term(), term(), bin_extract_fun(), boolean()) + -> tictactree()|{tictactree(), integer()}. %% @doc %% add_kv with ability to return segment ID of Key added add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) -> @@ -215,14 +232,15 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) -> SegLeaf2Upd = SegLeaf2 bxor SegChangeHash, SegLeaf1Upd = SegLeaf1 bxor SegChangeHash, + UpdatedTree = + replace_segment( + SegLeaf1Upd, SegLeaf2Upd, L1Extract, L2Extract, TicTacTree + ), case ReturnSegment of true -> - {replace_segment(SegLeaf1Upd, SegLeaf2Upd, - L1Extract, L2Extract, TicTacTree), - Segment}; + {UpdatedTree, Segment}; false -> - replace_segment(SegLeaf1Upd, SegLeaf2Upd, - L1Extract, L2Extract, TicTacTree) + UpdatedTree end. -spec alter_segment(integer(), integer(), tictactree()) -> tictactree(). @@ -241,8 +259,9 @@ alter_segment(Segment, Hash, Tree) -> %% Returns a list of segment IDs which hold differences between the state %% represented by the two trees. find_dirtyleaves(SrcTree, SnkTree) -> - Size = SrcTree#tictactree.size, - Size = SnkTree#tictactree.size, + SizeSrc = SrcTree#tictactree.size, + SizeSnk = SnkTree#tictactree.size, + true = SizeSrc == SizeSnk, IdxList = find_dirtysegments(fetch_root(SrcTree), fetch_root(SnkTree)), SrcLeaves = fetch_leaves(SrcTree, IdxList), @@ -250,12 +269,19 @@ find_dirtyleaves(SrcTree, SnkTree) -> FoldFun = fun(Idx, Acc) -> - {Idx, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), - {Idx, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves), + {_, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), + {_, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves), L2IdxList = segmentcompare(SrcLeaf, SnkLeaf), - Acc ++ lists:map(fun(X) -> X + Idx * ?L2_CHUNKSIZE end, L2IdxList) + lists:foldl( + fun(X, InnerAcc) -> + SegID = X + Idx * ?L2_CHUNKSIZE, + [SegID|InnerAcc] + end, + Acc, + L2IdxList) end, - lists:sort(lists:foldl(FoldFun, [], IdxList)). + %% Output not sorted, as sorted by the design of the construction process + lists:foldl(FoldFun, [], IdxList). -spec find_dirtysegments(binary(), binary()) -> list(integer()). %% @doc @@ -268,7 +294,7 @@ find_dirtysegments(SrcBin, SinkBin) -> %% @doc %% Return the level1 binary for a tree. fetch_root(TicTacTree) -> - TicTacTree#tictactree.level1. + from_level1_map(TicTacTree#tictactree.level1). -spec fetch_leaves(tictactree(), list(integer())) -> list(). %% @doc @@ -303,15 +329,21 @@ merge_trees(TreeA, TreeB) -> NewLevel2 = merge_binaries(L2A, L2B), array:set(SQN, NewLevel2, MergeL2) end, - NewLevel2 = lists:foldl(MergeFun, - MergedTree#tictactree.level2, - lists:seq(0, MergedTree#tictactree.width - 1)), + NewLevel2 = + lists:foldl( + MergeFun, + MergedTree#tictactree.level2, + lists:seq(0, MergedTree#tictactree.width - 1) + ), - MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}. - --spec get_segment(integer(), - integer()|xxsmall|xsmall|small|medium|large|xlarge) -> - integer(). + MergedTree#tictactree{ + level1 = to_level1_map(NewLevel1), + level2 = NewLevel2 + }. + +-spec get_segment( + integer(), + integer()|xxsmall|xsmall|small|medium|large|xlarge) -> integer(). %% @doc %% Return the segment ID for a Key. Can pass the tree size or the actual %% segment count derived from the size @@ -339,8 +371,8 @@ tictac_hash(BinKey, Val) when is_binary(BinKey) -> end, {HashKeyToSeg, AltHashKey bxor HashVal}. --spec keyto_doublesegment32(binary()) - -> {non_neg_integer(), non_neg_integer()}. +-spec keyto_doublesegment32( + binary()) -> {non_neg_integer(), non_neg_integer()}. %% @doc %% Used in tictac_hash/2 to provide an alternative hash of the key to bxor with %% the value, as well as the segment hash to locate the leaf of the tree to be @@ -372,8 +404,8 @@ keyto_segment48(BinKey) -> _Rest/binary>> = crypto:hash(md5, BinKey), {segment_hash, SegmentID, ExtraHash, AltHash}. --spec generate_segmentfilter_list(list(integer()), tree_size()) - -> false|list(integer()). +-spec generate_segmentfilter_list( + list(integer()), tree_size()) -> false|list(integer()). %% @doc %% Cannot accelerate segment listing for trees below certain sizes, so check %% the creation of segment filter lists with this function @@ -398,8 +430,8 @@ generate_segmentfilter_list(SegmentList, Size) -> SegmentList end. --spec adjust_segmentmatch_list(list(integer()), tree_size(), tree_size()) - -> list(integer()). +-spec adjust_segmentmatch_list( + list(integer()), tree_size(), tree_size()) -> list(integer()). %% @doc %% If we have dirty segments discovered by comparing trees of size CompareSize, %% and we want to see if it matches a segment for a key which was created for a @@ -441,8 +473,8 @@ adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> end. --spec match_segment({integer(), tree_size()}, {integer(), tree_size()}) - -> boolean(). +-spec match_segment( + {integer(), tree_size()}, {integer(), tree_size()}) -> boolean(). %% @doc %% Does segment A match segment B - given that segment A was generated using %% Tree size A and segment B was generated using Tree Size B @@ -462,8 +494,29 @@ join_segment(BranchID, LeafID) -> %%% Internal functions %%%============================================================================ --spec extract_segment(integer(), tictactree()) -> - {integer(), integer(), tree_extract(), tree_extract()}. +-spec to_level1_map(binary()) -> level1_map(). +to_level1_map(L1Bin) -> + to_level1_map_loop(L1Bin, maps:new(), 0). + +to_level1_map_loop(<<>>, L1MapAcc, _Idx) -> + L1MapAcc; +to_level1_map_loop(<>, L1MapAcc, Idx) -> + to_level1_map_loop(Rest, maps:put(Idx, Slice, L1MapAcc), Idx + 1). + + +-spec from_level1_map(level1_map()) -> binary(). +from_level1_map(L1Map) -> + lists:foldl( + fun(I, Acc) -> + <> + end, + <<>>, + lists:seq(0, maps:size(L1Map) - 1) + ). + +-spec extract_segment( + integer(), tictactree()) -> + {integer(), integer(), tree_extract(), tree_extract()}. %% @doc %% Extract the Level 1 and Level 2 slices from a tree to prepare an update extract_segment(Segment, TicTacTree) -> @@ -472,9 +525,10 @@ extract_segment(Segment, TicTacTree) -> Level1Pos = (Segment bsr ?L2_BITSIZE) band (TicTacTree#tictactree.width - 1), + Level1Slice = Level1Pos div 16, Level2BytePos = ?HASH_SIZE * Level2Pos, - Level1BytePos = ?HASH_SIZE * Level1Pos, + Level1BytePos = ?HASH_SIZE * (Level1Pos rem 16), Level2 = get_level2(TicTacTree, Level1Pos), @@ -484,7 +538,7 @@ extract_segment(Segment, TicTacTree) -> PostL2/binary>> = Level2, <> = TicTacTree#tictactree.level1, + PostL1/binary>> = maps:get(Level1Slice, TicTacTree#tictactree.level1), {SegLeaf1, SegLeaf2, @@ -492,25 +546,26 @@ extract_segment(Segment, TicTacTree) -> {PreL2, Level2BytePos, Level2Pos, HashIntLength, PostL2}}. --spec replace_segment(integer(), integer(), - tree_extract(), tree_extract(), - tictactree()) -> tictactree(). +-spec replace_segment( + integer(), integer(), tree_extract(), tree_extract(), tictactree()) -> + tictactree(). %% @doc %% Replace a slice of a tree replace_segment(L1Hash, L2Hash, L1Extract, L2Extract, TicTacTree) -> {PreL1, Level1BytePos, Level1Pos, HashIntLength, PostL1} = L1Extract, {PreL2, Level2BytePos, _Level2Pos, HashIntLength, PostL2} = L2Extract, + Level1Slice = Level1Pos div 16, + Level1Upd = <>, Level2Upd = <>, - TicTacTree#tictactree{level1 = Level1Upd, - level2 = array:set(Level1Pos, - Level2Upd, - TicTacTree#tictactree.level2)}. + TicTacTree#tictactree{ + level1 = maps:put(Level1Slice, Level1Upd, TicTacTree#tictactree.level1), + level2 = array:set(Level1Pos, Level2Upd, TicTacTree#tictactree.level2)}. get_level2(TicTacTree, L1Pos) -> case array:get(L1Pos, TicTacTree#tictactree.level2) of @@ -553,7 +608,7 @@ segmentcompare(SrcBin, SnkBin, Acc, Counter) -> <> = SrcBin, <> = SnkBin, case SrcHash of - SnkHash -> + H when H == SnkHash -> segmentcompare(SrcTail, SnkTail, Acc, Counter + 1); _ -> segmentcompare(SrcTail, SnkTail, [Counter|Acc], Counter + 1) @@ -576,7 +631,7 @@ merge_binaries(BinA, BinB) -> -include_lib("eunit/include/eunit.hrl"). checktree(TicTacTree) -> - checktree(TicTacTree#tictactree.level1, TicTacTree, 0). + checktree(from_level1_map(TicTacTree#tictactree.level1), TicTacTree, 0). checktree(<<>>, TicTacTree, Counter) -> true = TicTacTree#tictactree.width == Counter; @@ -656,6 +711,7 @@ simple_test_withsize(Size) -> DL0 = find_dirtyleaves(Tree1, Tree0), ?assertMatch(true, lists:member(GetSegFun(K1), DL0)), DL1 = find_dirtyleaves(Tree3, Tree1), + ?assertMatch(DL1, lists:sort(DL1)), ?assertMatch(true, lists:member(GetSegFun(K2), DL1)), ?assertMatch(true, lists:member(GetSegFun(K3), DL1)), ?assertMatch(false, lists:member(GetSegFun(K1), DL1)), @@ -665,6 +721,53 @@ simple_test_withsize(Size) -> ImpTree3 = import_tree(ExpTree3), ?assertMatch(DL1, find_dirtyleaves(ImpTree3, Tree1)). +dirtyleaves_sorted_test() -> + Tree0 = new_tree(test, large), + KVL1 = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(1, 50000) + ), + KVL2 = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(100000, 150000) + ), + Tree1 = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree0, + KVL1 + ), + Tree2 = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree0, + KVL2 + ), + SW0 = os:system_time(millisecond), + DL1 = find_dirtyleaves(Tree1, Tree2), + DL2 = find_dirtyleaves(Tree2, Tree1), + io:format( + user, + "Finding approx 100K dirty leaves twice in ~w milliseconds~n", + [os:system_time(millisecond) - SW0] + ), + ?assertMatch(DL1, lists:sort(DL1)), + ?assertMatch(DL2, lists:sort(DL2)), + ?assertMatch(DL1, DL2). + + merge_bysize_small_test() -> merge_test_withsize(small). @@ -870,6 +973,70 @@ find_dirtysegments_withanemptytree_test() -> ?assertMatch(ExpectedAnswer, find_dirtysegments(fetch_root(T3), <<>>)). +tictac_perf_test_() -> + {timeout, 120, fun tictac_perf_tester_multi/0}. + +tictac_perf_tester_multi() -> + tictac_perf_tester(1000000, large), + tictac_perf_tester(40000, small). + +tictac_perf_tester(KeyCount, TreeSize) -> + io:format(user, "Testing with Tree Size ~w~n", [TreeSize]), + io:format(user, "Generating ~w Keys and Hashes~n", [KeyCount]), + SW0 = os:system_time(millisecond), + KVL = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(1, KeyCount) + ), + + SW1 = os:system_time(millisecond), + io:format(user, "Generating Keys took ~w milliseconds~n", [SW1 - SW0]), + + Tree = new_tree(test, TreeSize), + log_memory_footprint(), + + SW2 = os:system_time(millisecond), + io:format(user, "Generating new tree took ~w milliseconds~n", [SW2 - SW1]), + + UpdTree = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree, + KVL + ), + + SW3 = os:system_time(millisecond), + io:format(user, "Loading tree took ~w milliseconds~n", [SW3 - SW2]), + log_memory_footprint(), + + ExportedTree = export_tree(UpdTree), + + SW4 = os:system_time(millisecond), + io:format(user, "Exporting tree took ~w milliseconds~n", [SW4 - SW3]), + + ImportedTree = import_tree(ExportedTree), + + SW5 = os:system_time(millisecond), + io:format(user, "Importing tree took ~w milliseconds~n", [SW5 - SW4]), + + log_memory_footprint(), + + ?assertMatch([], find_dirtyleaves(UpdTree, ImportedTree)). + +to_key(N) -> + list_to_binary(io_lib:format("K~8..0B", [N])). + +to_bucket(N) -> + list_to_binary(io_lib:format("B~8..0B", [N])). + +log_memory_footprint() -> + io:format(user, "Memory footprint ~0p~n", [erlang:memory()]). -endif. diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 217d209f..ef5efdf0 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -213,11 +213,12 @@ breaking_folds(_Config) -> % Find all keys index, and then same again but stop at a midpoint using a % throw {async, IdxFolder} = - leveled_bookie:book_indexfold(Bookie1, - list_to_binary("Bucket"), - {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, - {true, undefined}), + leveled_bookie:book_indexfold( + Bookie1, + list_to_binary("Bucket"), + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {true, undefined}), KeyList1 = lists:reverse(IdxFolder()), io:format("Index fold with result size ~w~n", [length(KeyList1)]), true = KeyCount == length(KeyList1), @@ -235,11 +236,12 @@ breaking_folds(_Config) -> end end, {async, IdxFolderToMidK} = - leveled_bookie:book_indexfold(Bookie1, - list_to_binary("Bucket"), - {FoldKeyThrowFun, []}, - {"idx1_bin", "#", "|"}, - {true, undefined}), + leveled_bookie:book_indexfold( + Bookie1, + list_to_binary("Bucket"), + {FoldKeyThrowFun, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {true, undefined}), CatchingFold = fun(AsyncFolder) -> try @@ -261,10 +263,8 @@ breaking_folds(_Config) -> [{K, Size}|Acc] end, {async, HeadFolder} = - leveled_bookie:book_headfold(Bookie1, - ?RIAK_TAG, - {HeadFoldFun, []}, - true, true, false), + leveled_bookie:book_headfold( + Bookie1, ?RIAK_TAG, {HeadFoldFun, []}, true, true, false), KeySizeList1 = lists:reverse(HeadFolder()), io:format("Head fold with result size ~w~n", [length(KeySizeList1)]), true = KeyCount == length(KeySizeList1), @@ -472,11 +472,9 @@ small_load_with2i(_Config) -> testutil:check_forobject(Bookie1, TestObject), ObjectGen = testutil:get_compressiblevalue_andinteger(), IndexGen = testutil:get_randomindexes_generator(8), - ObjL1 = testutil:generate_objects(10000, - uuid, - [], - ObjectGen, - IndexGen), + ObjL1 = + testutil:generate_objects( + 10000, uuid, [], ObjectGen, IndexGen), testutil:riakload(Bookie1, ObjL1), ChkList1 = lists:sublist(lists:sort(ObjL1), 100), testutil:check_forlist(Bookie1, ChkList1), @@ -486,7 +484,7 @@ small_load_with2i(_Config) -> IdxQ1 = {index_query, "Bucket", {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, {true, undefined}}, {async, IdxFolder} = leveled_bookie:book_returnfolder(Bookie1, IdxQ1), KeyList1 = lists:usort(IdxFolder()), @@ -495,7 +493,7 @@ small_load_with2i(_Config) -> IdxQ2 = {index_query, {"Bucket", LastKey}, {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", LastTerm, "|"}, + {<<"idx1_bin">>, LastTerm, <<"|">>}, {false, undefined}}, {async, IdxFolderLK} = leveled_bookie:book_returnfolder(Bookie1, IdxQ2), KeyList2 = lists:usort(IdxFolderLK()), @@ -530,12 +528,14 @@ small_load_with2i(_Config) -> {FoldObjectsFun, []}, false), KeyHashList2 = HTreeF2(), - {async, HTreeF3} = leveled_bookie:book_objectfold(Bookie1, - ?RIAK_TAG, - "Bucket", - {"idx1_bin", "#", "|"}, - {FoldObjectsFun, []}, - false), + {async, HTreeF3} = + leveled_bookie:book_objectfold( + Bookie1, + ?RIAK_TAG, + "Bucket", + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {FoldObjectsFun, []}, + false), KeyHashList3 = HTreeF3(), true = 9901 == length(KeyHashList1), % also includes the test object true = 9900 == length(KeyHashList2), @@ -585,96 +585,86 @@ small_load_with2i(_Config) -> query_count(_Config) -> RootPath = testutil:reset_filestructure(), - {ok, Book1} = leveled_bookie:book_start(RootPath, - 2000, - 50000000, - testutil:sync_strategy()), + {ok, Book1} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), BucketBin = list_to_binary("Bucket"), - {TestObject, TestSpec} = testutil:generate_testobject(BucketBin, - term_to_binary("Key1"), - "Value1", - [], - [{"MDK1", "MDV1"}]), + {TestObject, TestSpec} = + testutil:generate_testobject( + BucketBin, term_to_binary("Key1"), "Value1", [], [{"MDK1", "MDV1"}]), ok = testutil:book_riakput(Book1, TestObject, TestSpec), testutil:check_forobject(Book1, TestObject), testutil:check_formissingobject(Book1, "Bucket1", "Key2"), testutil:check_forobject(Book1, TestObject), - lists:foreach(fun(_X) -> - V = testutil:get_compressiblevalue(), - Indexes = testutil:get_randomindexes_generator(8), - SW = os:timestamp(), - ObjL1 = testutil:generate_objects(10000, - binary_uuid, - [], - V, - Indexes), - testutil:riakload(Book1, ObjL1), - io:format("Put of 10000 objects with 8 index entries " - ++ - "each completed in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SW)]) - end, - lists:seq(1, 8)), + lists:foreach( + fun(_X) -> + V = testutil:get_compressiblevalue(), + Indexes = testutil:get_randomindexes_generator(8), + SW = os:timestamp(), + ObjL1 = testutil:generate_objects(10000, + binary_uuid, + [], + V, + Indexes), + testutil:riakload(Book1, ObjL1), + io:format( + "Put of 10000 objects with 8 index entries " + "each completed in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SW)]) + end, + lists:seq(1, 8)), testutil:check_forobject(Book1, TestObject), - Total = lists:foldl(fun(X, Acc) -> - IdxF = "idx" ++ integer_to_list(X) ++ "_bin", - T = count_termsonindex(BucketBin, - IdxF, - Book1, - ?KEY_ONLY), - io:format("~w terms found on index ~s~n", - [T, IdxF]), - Acc + T - end, - 0, - lists:seq(1, 8)), - ok = case Total of - 640000 -> - ok + Total = + lists:foldl( + fun(X, Acc) -> + IdxF = "idx" ++ integer_to_list(X) ++ "_bin", + T = + count_termsonindex( + BucketBin, list_to_binary(IdxF), Book1, ?KEY_ONLY), + io:format("~w terms found on index ~s~n", [T, IdxF]), + Acc + T end, - Index1Count = count_termsonindex(BucketBin, - "idx1_bin", - Book1, - ?KEY_ONLY), + 0, + lists:seq(1, 8)), + true = Total == 640000, + Index1Count = + count_termsonindex( + BucketBin, <<"idx1_bin">>, Book1, ?KEY_ONLY), ok = leveled_bookie:book_close(Book1), - {ok, Book2} = leveled_bookie:book_start(RootPath, - 1000, - 50000000, - testutil:sync_strategy()), - Index1Count = count_termsonindex(BucketBin, - "idx1_bin", - Book2, - ?KEY_ONLY), + {ok, Book2} = + leveled_bookie:book_start( + RootPath, 1000, 50000000, testutil:sync_strategy()), + Index1Count = + count_termsonindex( + BucketBin, <<"idx1_bin">>, Book2, ?KEY_ONLY), NameList = testutil:name_list(), - TotalNameByName = lists:foldl(fun({_X, Name}, Acc) -> - {ok, Regex} = re:compile("[0-9]+" ++ - Name), - SW = os:timestamp(), - T = count_termsonindex(BucketBin, - "idx1_bin", - Book2, - {false, - Regex}), - TD = timer:now_diff(os:timestamp(), - SW), - io:format("~w terms found on " ++ - "index idx1 with a " ++ - "regex in ~w " ++ - "microseconds~n", - [T, TD]), - Acc + T - end, - 0, - NameList), - ok = case TotalNameByName of - Index1Count -> - ok + TotalNameByName = + lists:foldl( + fun({_X, Name}, Acc) -> + {ok, Regex} = + re:compile("[0-9]+" ++ Name), + SW = os:timestamp(), + T = + count_termsonindex( + BucketBin, + list_to_binary("idx1_bin"), + Book2, + {false, Regex}), + TD = timer:now_diff(os:timestamp(), SW), + io:format( + "~w terms found on index idx1 with a " + "regex in ~w microseconds~n", + [T, TD]), + Acc + T end, + 0, + NameList), + true = TotalNameByName == Index1Count, {ok, RegMia} = re:compile("[0-9]+Mia"), Query1 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, - {"idx2_bin", "2000", "2000|"}, + {<<"idx2_bin">>, <<"2000">>, <<"2000|">>}, {false, RegMia}}, {async, Mia2KFolder1} = leveled_bookie:book_returnfolder(Book2, Query1), @@ -682,7 +672,7 @@ query_count(_Config) -> Query2 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, - {"idx2_bin", "2000", "2001"}, + {<<"idx2_bin">>, <<"2000">>, <<"2001">>}, {true, undefined}}, {async, Mia2KFolder2} = leveled_bookie:book_returnfolder(Book2, Query2), @@ -705,7 +695,7 @@ query_count(_Config) -> Query3 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, - {"idx2_bin", "1980", "2100"}, + {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, {false, RxMia2K}}, {async, Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3), @@ -713,26 +703,26 @@ query_count(_Config) -> V9 = testutil:get_compressiblevalue(), Indexes9 = testutil:get_randomindexes_generator(8), - [{_RN, Obj9, Spc9}] = testutil:generate_objects(1, - binary_uuid, - [], - V9, - Indexes9), + [{_RN, Obj9, Spc9}] = + testutil:generate_objects( + 1, binary_uuid, [], V9, Indexes9), ok = testutil:book_riakput(Book2, Obj9, Spc9), - R9 = lists:map(fun({add, IdxF, IdxT}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, - R = leveled_bookie:book_returnfolder(Book2, Q), - {async, Fldr} = R, - case length(Fldr()) of - X when X > 0 -> - {IdxF, IdxT, X} - end - end, - Spc9), + R9 = + lists:map( + fun({add, IdxF, IdxT}) -> + Q = {index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, + ?KEY_ONLY}, + R = leveled_bookie:book_returnfolder(Book2, Q), + {async, Fldr} = R, + case length(Fldr()) of + X when X > 0 -> + {IdxF, IdxT, X} + end + end, + Spc9), Spc9Del = lists:map(fun({add, IdxF, IdxT}) -> {remove, IdxF, IdxT} end, Spc9), ok = testutil:book_riakput(Book2, Obj9, Spc9Del), @@ -751,44 +741,44 @@ query_count(_Config) -> end, R9), ok = leveled_bookie:book_close(Book2), - {ok, Book3} = leveled_bookie:book_start(RootPath, - 2000, - 50000000, - testutil:sync_strategy()), - lists:foreach(fun({IdxF, IdxT, X}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, - R = leveled_bookie:book_returnfolder(Book3, Q), - {async, Fldr} = R, - case length(Fldr()) of - Y -> - Y = X - 1 - end - end, - R9), + {ok, Book3} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + lists:foreach( + fun({IdxF, IdxT, X}) -> + Q = {index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, + ?KEY_ONLY}, + R = leveled_bookie:book_returnfolder(Book3, Q), + {async, Fldr} = R, + case length(Fldr()) of + Y -> + Y = X - 1 + end + end, + R9), ok = testutil:book_riakput(Book3, Obj9, Spc9), ok = leveled_bookie:book_close(Book3), - {ok, Book4} = leveled_bookie:book_start(RootPath, - 2000, - 50000000, - testutil:sync_strategy()), - lists:foreach(fun({IdxF, IdxT, X}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, - R = leveled_bookie:book_returnfolder(Book4, Q), - {async, Fldr} = R, - case length(Fldr()) of - X -> - ok - end - end, - R9), + {ok, Book4} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + lists:foreach( + fun({IdxF, IdxT, X}) -> + Q = {index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, + ?KEY_ONLY}, + R = leveled_bookie:book_returnfolder(Book4, Q), + {async, Fldr} = R, + case length(Fldr()) of + X -> + ok + end + end, + R9), testutil:check_forobject(Book4, TestObject), FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end, @@ -803,24 +793,15 @@ query_count(_Config) -> true = sets:size(BucketSet1) == 1, - ObjList10A = testutil:generate_objects(5000, - binary_uuid, - [], - V9, - Indexes9, - "BucketA"), - ObjList10B = testutil:generate_objects(5000, - binary_uuid, - [], - V9, - Indexes9, - "BucketB"), - ObjList10C = testutil:generate_objects(5000, - binary_uuid, - [], - V9, - Indexes9, - "BucketC"), + ObjList10A = + testutil:generate_objects( + 5000, binary_uuid, [], V9, Indexes9, "BucketA"), + ObjList10B = + testutil:generate_objects( + 5000, binary_uuid, [], V9, Indexes9, "BucketB"), + ObjList10C = + testutil:generate_objects( + 5000, binary_uuid, [], V9, Indexes9, "BucketC"), testutil:riakload(Book4, ObjList10A), testutil:riakload(Book4, ObjList10B), testutil:riakload(Book4, ObjList10C), @@ -847,31 +828,30 @@ query_count(_Config) -> ok = leveled_bookie:book_close(Book5), testutil:reset_filestructure(). - count_termsonindex(Bucket, IdxField, Book, QType) -> - lists:foldl(fun(X, Acc) -> - SW = os:timestamp(), - ST = integer_to_list(X), - ET = ST ++ "|", - Q = {index_query, - Bucket, - {fun testutil:foldkeysfun/3, []}, - {IdxField, ST, ET}, - QType}, - R = leveled_bookie:book_returnfolder(Book, Q), - {async, Folder} = R, - Items = length(Folder()), - io:format("2i query from term ~s on index ~s took " ++ - "~w microseconds~n", - [ST, - IdxField, - timer:now_diff(os:timestamp(), SW)]), - Acc + Items - end, - 0, - lists:seq(190, 221)). + lists:foldl( + fun(X, Acc) -> + SW = os:timestamp(), + ST = list_to_binary(integer_to_list(X)), + Pipe = <<"|">>, + ET = <>, + Q = {index_query, + Bucket, + {fun testutil:foldkeysfun/3, []}, + {IdxField, ST, ET}, + QType}, + R = leveled_bookie:book_returnfolder(Book, Q), + {async, Folder} = R, + Items = length(Folder()), + io:format( + "2i query from term ~s on index ~s took ~w microseconds~n", + [ST, IdxField, timer:now_diff(os:timestamp(), SW)]), + Acc + Items + end, + 0, + lists:seq(190, 221)). multibucket_fold(_Config) -> RootPath = testutil:reset_filestructure(), diff --git a/test/end_to_end/perf_SUITE.erl b/test/end_to_end/perf_SUITE.erl index 9a3fabd1..98055830 100644 --- a/test/end_to_end/perf_SUITE.erl +++ b/test/end_to_end/perf_SUITE.erl @@ -3,10 +3,25 @@ -define(INFO, info). -export([all/0, suite/0]). -export([ - riak_ctperf/1, riak_fullperf/1, riak_profileperf/1 + riak_ctperf/1, riak_fullperf/1, riak_profileperf/1, riak_miniperf/1 ]). -all() -> [riak_ctperf]. +-define(PEOPLE_INDEX, <<"people_bin">>). +-define(MINI_QUERY_DIVISOR, 8). +-define(RGEX_QUERY_DIVISOR, 32). + +-ifndef(performance). + -define(performance, riak_ctperf). +-endif. +all() -> [?performance]. + +-if(?performance == riak_profileperf andalso ?OTP_RELEASE >= 24). + % Requires map functions from OTP 24 + -define(ACCOUNTING, true). +-else. + -define(ACCOUNTING, false). +-endif. + suite() -> [{timetrap, {hours, 16}}]. riak_fullperf(_Config) -> @@ -24,16 +39,16 @@ riak_fullperf(ObjSize, PM, LC) -> output_result(R5A), R5B = riak_load_tester(Bucket, 5000000, ObjSize, [], PM, LC), output_result(R5B), - R10 = riak_load_tester(Bucket, 10000000, ObjSize, [], PM, LC), + R10 = riak_load_tester(Bucket, 8000000, ObjSize, [], PM, LC), output_result(R10) . riak_profileperf(_Config) -> riak_load_tester( {<<"SensibleBucketTypeName">>, <<"SensibleBucketName0">>}, - 2000000, + 1200000, 2048, - [load, head, get, query, mini_query, full, guess, estimate, update], + [load, head, get, query, mini_query, regex_query, full, guess, estimate, update], zstd, as_store ). @@ -42,12 +57,18 @@ riak_profileperf(_Config) -> riak_ctperf(_Config) -> riak_load_tester(<<"B0">>, 400000, 1024, [], native, as_store). +riak_miniperf(_Config) -> + Bucket = {<<"SensibleBucketTypeName">>, <<"SensibleBucketName0">>}, + R2A = riak_load_tester(Bucket, 2000000, 2048, [], zstd, as_store), + output_result(R2A). + riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> ct:log( ?INFO, "Basic riak test with KeyCount ~w ObjSize ~w PressMethod ~w Ledger ~w", [KeyCount, ObjSize, PM, LC] ), + IndexCount = 100000, GetFetches = KeyCount div 4, @@ -74,6 +95,7 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> IntIndex = "integer" ++ integer_to_list(ListID) ++ "_int", BinIndex = "binary" ++ integer_to_list(ListID) ++ "_bin", [{add, list_to_binary(IntIndex), RandInt}, + {add, ?PEOPLE_INDEX, list_to_binary(random_people_index())}, {add, list_to_binary(IntIndex), RandInt + 1}, {add, list_to_binary(BinIndex), <>}, {add, list_to_binary(BinIndex), <<(RandInt + 1):32/integer>>}] @@ -82,6 +104,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> CountPerList = KeyCount div 10, + LoadMemoryTracker = memory_tracking(load, 1000), + LoadAccountant = accounting(load, 10000, ProfileList), TC4 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 4), TC1 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 1), TC9 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 9), @@ -92,6 +116,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> TC3 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 3), TC7 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 7), TC10 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 10), + ok = stop_accounting(LoadAccountant), + {MT0, MP0, MB0} = stop_tracker(LoadMemoryTracker), ct:log( ?INFO, @@ -104,20 +130,23 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> (TC1 + TC2 + TC3 + TC4 + TC5 + TC6 + TC7 + TC8 + TC9 + TC10) div 1000, ct:log(?INFO, "Total load time ~w ms", [TotalLoadTime]), - {MT0, MP0, MB0} = memory_usage(), - + HeadMemoryTracker = memory_tracking(head, 1000), + HeadAccountant = accounting(head, 2000, ProfileList), TotalHeadTime = random_fetches(head, Bookie1, Bucket, KeyCount, HeadFetches), - - {MT1, MP1, MB1} = memory_usage(), + ok = stop_accounting(HeadAccountant), + {MT1, MP1, MB1} = stop_tracker(HeadMemoryTracker), + GetMemoryTracker = memory_tracking(get, 1000), + GetAccountant = accounting(get, 3000, ProfileList), TotalGetTime = random_fetches(get, Bookie1, Bucket, KeyCount, GetFetches), + ok = stop_accounting(GetAccountant), + {MT2, MP2, MB2} = stop_tracker(GetMemoryTracker), - {MT2, MP2, MB2} = memory_usage(), - + QueryMemoryTracker = memory_tracking(query, 1000), + QueryAccountant = accounting(query, 1000, ProfileList), QuerySize = max(10, IndexCount div 1000), - MiniQuerySize = max(1, IndexCount div 50000), TotalQueryTime = random_queries( Bookie1, @@ -126,6 +155,12 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> IndexCount, QuerySize, IndexesReturned), + ok = stop_accounting(QueryAccountant), + {MT3a, MP3a, MB3a} = stop_tracker(QueryMemoryTracker), + + MiniQueryMemoryTracker = memory_tracking(mini_query, 1000), + MiniQueryAccountant = accounting(mini_query, 1000, ProfileList), + MiniQuerySize = max(1, IndexCount div 50000), TotalMiniQueryTime = random_queries( Bookie1, @@ -133,18 +168,76 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> 10, IndexCount, MiniQuerySize, - IndexesReturned div 8), + IndexesReturned div ?MINI_QUERY_DIVISOR), + ok = stop_accounting(MiniQueryAccountant), + {MT3b, MP3b, MB3b} = stop_tracker(MiniQueryMemoryTracker), + + RegexQueryMemoryTracker = memory_tracking(regex_query, 1000), + RegexQueryAccountant = accounting(regex_query, 2000, ProfileList), + RegexQueryTime = + random_people_queries( + Bookie1, + Bucket, + IndexesReturned div ?RGEX_QUERY_DIVISOR), + ok = stop_accounting(RegexQueryAccountant), + {MT3c, MP3c, MB3c} = stop_tracker(RegexQueryMemoryTracker), - {MT3, MP3, MB3} = memory_usage(), + GuessMemoryTracker = memory_tracking(guess, 1000), + GuessAccountant = accounting(guess, 1000, ProfileList), + {GuessTime, GuessCount} = + lists:foldl( + fun(_I, {TSAcc, CountAcc}) -> + {TS, Count} = counter(Bookie1, guess), + {TSAcc + TS, CountAcc + Count} + end, + {0, 0}, + lists:seq(1, 60) + ), + ok = stop_accounting(GuessAccountant), + {MT4a, MP4a, MB4a} = stop_tracker(GuessMemoryTracker), - {FullFoldTime, SegFoldTime} = size_estimate_summary(Bookie1), + EstimateMemoryTracker = memory_tracking(estimate, 1000), + EstimateAccountant = accounting(estimate, 1000, ProfileList), + {EstimateTime, EstimateCount} = + lists:foldl( + fun(_I, {TSAcc, CountAcc}) -> + {TS, Count} = counter(Bookie1, estimate), + {TSAcc + TS, CountAcc + Count} + end, + {0, 0}, + lists:seq(1, 40) + ), + ok = stop_accounting(EstimateAccountant), + {MT4b, MP4b, MB4b} = stop_tracker(EstimateMemoryTracker), - {MT4, MP4, MB4} = memory_usage(), + SegFoldTime = (GuessTime + EstimateTime) div 1000, + + FullFoldMemoryTracker = memory_tracking(full, 1000), + FullFoldAccountant = accounting(full, 2000, ProfileList), + {FullFoldTime, FullFoldCount} = + lists:foldl( + fun(_I, {TSAcc, CountAcc}) -> + {TS, Count} = counter(Bookie1, full), + {TSAcc + TS, CountAcc + Count} + end, + {0, 0}, + lists:seq(1, 5) + ), + ok = stop_accounting(FullFoldAccountant), + {MT5, MP5, MB5} = stop_tracker(FullFoldMemoryTracker), + ct:log( + info, + "Guess size ~w Estimate size ~w Actual size ~w", + [GuessCount div 60, EstimateCount div 40, FullFoldCount div 10] + ), + + UpdateMemoryTracker = memory_tracking(update, 1000), + UpdateAccountant = accounting(update, 1000, ProfileList), TotalUpdateTime = rotate_chunk(Bookie1, <<"UpdBucket">>, KeyCount div 50, ObjSize), - - {MT5, MP5, MB5} = memory_usage(), + ok = stop_accounting(UpdateAccountant), + {MT6, MP6, MB6} = stop_tracker(UpdateMemoryTracker), DiskSpace = lists:nth(1, string:tokens(os:cmd("du -sh riakLoad"), "\t")), ct:log(?INFO, "Disk space taken by test ~s", [DiskSpace]), @@ -172,8 +265,9 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> P -> P end, + io:format(user, "~nProfile ~p:~n", [P]), ProFun = profile_fun(P0, ProfileData), - profile_test(Bookie1, ProFun) + profile_test(Bookie1, ProFun, P) end, ProfileList), @@ -183,21 +277,26 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> {KeyCount, ObjSize, {PM, LC}, TotalLoadTime, TotalHeadTime, TotalGetTime, - TotalQueryTime, TotalMiniQueryTime, FullFoldTime, SegFoldTime, + TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, + FullFoldTime div 1000, SegFoldTime, TotalUpdateTime, DiskSpace, - {(MT0 + MT1 + MT2 + MT3 + MT4 + MT5) div 6000000, - (MP0 + MP1 + MP2 + MP3 + MP4 + MP5) div 6000000, - (MB0 + MB1 + MB2 + MB3 + MB4 + MB5) div 6000000}, + {(MT0 + MT1 + MT2 + MT3a + MT3b + MT3c + MT4a + MT4b + MT5 + MT6) + div 9, + (MP0 + MP1 + MP2 + MP3a + MP3b + MP3c + MP4a + MP4b + MP5 + MP6) + div 9, + (MB0 + MB1 + MB2 + MB3a + MB3b + MB3c + MB4a + MB4b + MB5 + MB6) + div 9}, SSTPids, CDBPids}. - -profile_test(Bookie, ProfileFun) -> +profile_test(Bookie, ProfileFun, P) -> {Inker, Pcl, SSTPids, PClerk, CDBPids, IClerk} = get_pids(Bookie), TestPid = self(), profile_app( [TestPid, Bookie, Inker, IClerk, Pcl, PClerk] ++ SSTPids ++ CDBPids, - ProfileFun). + ProfileFun, + P + ). get_pids(Bookie) -> {ok, Inker, Pcl} = leveled_bookie:book_returnactors(Bookie), @@ -211,7 +310,8 @@ output_result( {KeyCount, ObjSize, PressMethod, TotalLoadTime, TotalHeadTime, TotalGetTime, - TotalQueryTime, TotalMiniQueryTime, TotalFullFoldTime, TotalSegFoldTime, + TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, + TotalFullFoldTime, TotalSegFoldTime, TotalUpdateTime, DiskSpace, {TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB}, @@ -227,6 +327,7 @@ output_result( "TotalGetTime - ~w ms~n" "TotalQueryTime - ~w ms~n" "TotalMiniQueryTime - ~w ms~n" + "TotalRegexQueryTime - ~w ms~n" "TotalFullFoldTime - ~w ms~n" "TotalAAEFoldTime - ~w ms~n" "TotalUpdateTime - ~w ms~n" @@ -236,7 +337,8 @@ output_result( "Closing count of CDB Files - ~w~n", [KeyCount, ObjSize, PressMethod, TotalLoadTime, TotalHeadTime, TotalGetTime, - TotalQueryTime, TotalMiniQueryTime, TotalFullFoldTime, TotalSegFoldTime, + TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, + TotalFullFoldTime, TotalSegFoldTime, TotalUpdateTime, DiskSpace, TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB, @@ -244,13 +346,12 @@ output_result( ). memory_usage() -> - garbage_collect(), % GC the test process MemoryUsage = erlang:memory(), {element(2, lists:keyfind(total, 1, MemoryUsage)), element(2, lists:keyfind(processes, 1, MemoryUsage)), element(2, lists:keyfind(binary, 1, MemoryUsage))}. -profile_app(Pids, ProfiledFun) -> +profile_app(Pids, ProfiledFun, P) -> eprof:start(), eprof:start_profiling(Pids), @@ -258,40 +359,12 @@ profile_app(Pids, ProfiledFun) -> ProfiledFun(), eprof:stop_profiling(), - eprof:analyze(total), - eprof:stop(). - -size_estimate_summary(Bookie) -> - Loops = 10, - ct:log( - ?INFO, - "Size Estimate Tester (SET) started with Loops ~w", - [Loops] - ), - {{TotalGuessTime, TotalEstimateTime, TotalCountTime}, - {TotalEstimateVariance, TotalGuessVariance}} = - lists:foldl( - fun(_I, {{GT, ET, CT}, {AET, AGT}}) -> - {{GT0, ET0, CT0}, {AE0, AG0}} = size_estimate_tester(Bookie), - {{GT + GT0, ET + ET0, CT + CT0}, {AET + AE0, AGT + AG0}} - end, - {{0, 0, 0}, {0, 0}}, - lists:seq(1, Loops) - ), - ct:log( - ?INFO, - "SET: MeanGuess ~w ms MeanEstimate ~w ms MeanCount ~w ms", - [TotalGuessTime div 10000, - TotalEstimateTime div 10000, - TotalCountTime div 10000] - ), - ct:log( - ?INFO, - "Mean variance in Estimate ~w Guess ~w", - [TotalEstimateVariance div Loops, TotalGuessVariance div Loops] - ), - %% Assume that segment-list folds are 10 * as common as all folds - {TotalCountTime div 1000, (TotalGuessTime + TotalEstimateTime) div 1000}. + eprof:log(atom_to_list(P) ++ ".log"), + eprof:analyze(total, [{filter, [{time, 150000}]}]), + eprof:stop(), + {ok, Analysis} = file:read_file(atom_to_list(P) ++ ".log"), + io:format(user, "~n~s~n", [Analysis]) + . rotate_chunk(Bookie, Bucket, KeyCount, ObjSize) -> @@ -311,15 +384,6 @@ rotate_chunk(Bookie, Bucket, KeyCount, ObjSize) -> end), TC div 1000. -load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> - ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]), - ObjList = - generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk), - {TC, ok} = timer:tc(fun() -> testutil:riakload(Bookie, ObjList) end), - garbage_collect(), - timer:sleep(2000), - TC. - generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> testutil:generate_objects( CountPerList, @@ -329,31 +393,60 @@ generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> Bucket ). -size_estimate_tester(Bookie) -> - %% Data size test - calculate data size, then estimate data size - {CountTS, Count} = counter(Bookie, full), - {CountTSEstimate, CountEstimate} = counter(Bookie, estimate), - {CountTSGuess, CountGuess} = counter(Bookie, guess), - {GuessTolerance, EstimateTolerance} = - case Count of - C when C < 500000 -> - {0.20, 0.15}; - C when C < 1000000 -> - {0.12, 0.1}; - C when C < 2000000 -> - {0.1, 0.08}; - _C -> - {0.08, 0.05} - end, +load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> + ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]), + time_load_chunk( + Bookie, + {Bucket, base64:encode(leveled_rand:rand_bytes(ObjSize)), IndexGenFun(Chunk)}, + (Chunk - 1) * CountPerList + 1, + Chunk * CountPerList, + 0, + 0 + ). - true = - ((CountGuess / Count) > (1.0 - GuessTolerance)) - and ((CountGuess / Count) < (1.0 + GuessTolerance)), - true = - ((CountEstimate / Count) > (1.0 - EstimateTolerance)) - and ((CountEstimate / Count) < (1.0 + EstimateTolerance)), - {{CountTSGuess, CountTSEstimate, CountTS}, - {abs(CountEstimate - Count), abs(CountGuess - Count)}}. +time_load_chunk( + _Bookie, _ObjDetails, KeyNumber, TopKey, TotalTime, PC) + when KeyNumber > TopKey -> + garbage_collect(), + timer:sleep(2000), + ct:log( + ?INFO, + "Count of ~w pauses during chunk load", + [PC] + ), + TotalTime; +time_load_chunk( + Bookie, {Bucket, Value, IndexGen}, KeyNumber, TopKey, TotalTime, PC) -> + ThisProcess = self(), + spawn( + fun() -> + {RiakObj, IndexSpecs} = + testutil:set_object( + Bucket, testutil:fixed_bin_key(KeyNumber), Value, IndexGen, []), + {TC, R} = + timer:tc( + testutil, book_riakput, [Bookie, RiakObj, IndexSpecs] + ), + case R of + ok -> + ThisProcess! {TC, 0}; + pause -> + timer:sleep(40), + ThisProcess ! {TC + 40000, 1} + end + end + ), + receive + {PutTime, Pause} -> + time_load_chunk( + Bookie, + {Bucket, Value, IndexGen}, + KeyNumber + 1, + TopKey, + TotalTime + PutTime, + PC + Pause + ) + end. counter(Bookie, full) -> {async, DataSizeCounter} = @@ -471,6 +564,42 @@ random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) -> TC div 1000. +random_people_queries(Bookie, Bucket, IndexesReturned) -> + SeventiesWillowRegex = + "[^\\|]*\\|197[0-9]{5}\\|[^\\|]*\\|" + "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*", + %% born in the 70s with Willow as a given name + QueryFun = + fun() -> + Surname = get_random_surname(), + Range = + {?PEOPLE_INDEX, + Surname, + <> + }, + {ok, TermRegex} = + re:compile(SeventiesWillowRegex), + FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, + {async, R} = + leveled_bookie:book_indexfold( + Bookie, + {Bucket, <<>>}, + {FoldKeysFun, 0}, + Range, + {true, TermRegex}), + R() + end, + + {TC, {QC, EF}} = + timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), + ct:log( + ?INFO, + "Fetch of ~w index entries by regex in ~w queries took ~w ms", + [EF, QC, TC div 1000] + ), + TC div 1000. + + run_queries(_QueryFun, QueryCount, EntriesFound, TargetEntries) when EntriesFound >= TargetEntries -> {QueryCount, EntriesFound}; @@ -486,7 +615,8 @@ profile_fun( {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned}) -> fun() -> random_queries( - Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned div 8) + Bookie, Bucket, 10, IndexCount, QuerySize, + IndexesReturned div ?MINI_QUERY_DIVISOR) end; profile_fun( {query, QuerySize}, @@ -495,6 +625,13 @@ profile_fun( random_queries( Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned) end; +profile_fun( + regex_query, + {Bookie, Bucket, _KeyCount, _ObjSize, _IndexCount, IndexesReturned}) -> + fun() -> + random_people_queries( + Bookie, Bucket, IndexesReturned div ?RGEX_QUERY_DIVISOR) + end; profile_fun( {head, HeadFetches}, {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> @@ -524,11 +661,230 @@ profile_fun( profile_fun( CounterFold, {Bookie, _Bucket, _KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> + Runs = + case CounterFold of + full -> + 20; + estimate -> + 40; + guess -> + 100 + end, fun() -> lists:foreach( fun(_I) -> _ = counter(Bookie, CounterFold) end, - lists:seq(1, 10) + lists:seq(1, Runs) ) end. + +random_people_index() -> + io_lib:format( + "~s|~s|~s|#~s#~s#~s|#~s#~s#~s", + [get_random_surname(), + get_random_dob(), + get_random_dod(), + get_random_givenname(), get_random_givenname(), get_random_givenname(), + get_random_postcode(), get_random_postcode(), get_random_postcode() + ] + ). + +get_random_surname() -> + lists:nth( + rand:uniform(100), + [<<"Smith">>, <<"Jones">>, <<"Taylor">>, <<"Brown">>, <<"Williams">>, + <<"Wilson">>, <<"Johnson">>, <<"Davies">>, <<"Patel">>, <<"Robinson">>, + <<"Wright">>, <<"Thompson">>, <<"Evans">>, <<"Walker">>, <<"White">>, + <<"Roberts">>, <<"Green">>, <<"Hall">>, <<"Thomas">>, <<"Clarke">>, + <<"Jackson">>, <<"Wood">>, <<"Harris">>, <<"Edwards">>, <<"Turner">>, + <<"Martin">>, <<"Cooper">>, <<"Hill">>, <<"Ward">>, <<"Hughes">>, + <<"Moore">>, <<"Clark">>, <<"King">>, <<"Harrison">>, <<"Lewis">>, + <<"Baker">>, <<"Lee">>, <<"Allen">>, <<"Morris">>, <<"Khan">>, + <<"Scott">>, <<"Watson">>, <<"Davis">>, <<"Parker">>, <<"James">>, + <<"Bennett">>, <<"Young">>, <<"Phillips">>, <<"Richardson">>, <<"Mitchell">>, + <<"Bailey">>, <<"Carter">>, <<"Cook">>, <<"Singh">>, <<"Shaw">>, + <<"Bell">>, <<"Collins">>, <<"Morgan">>, <<"Kelly">>, <<"Begum">>, + <<"Miller">>, <<"Cox">>, <<"Hussain">>, <<"Marshall">>, <<"Simpson">>, + <<"Price">>, <<"Anderson">>, <<"Adams">>, <<"Wilkinson">>, <<"Ali">>, + <<"Ahmed">>, <<"Foster">>, <<"Ellis">>, <<"Murphy">>, <<"Chapman">>, + <<"Mason">>, <<"Gray">>, <<"Richards">>, <<"Webb">>, <<"Griffiths">>, + <<"Hunt">>, <<"Palmer">>, <<"Campbell">>, <<"Holmes">>, <<"Mills">>, + <<"Rogers">>, <<"Barnes">>, <<"Knight">>, <<"Matthews">>, <<"Barker">>, + <<"Powell">>, <<"Stevens">>, <<"Kaur">>, <<"Fisher">>, <<"Butler">>, + <<"Dixon">>, <<"Russell">>, <<"Harvey">>, <<"Pearson">>, <<"Graham">>] + ). + +get_random_givenname() -> + lists:nth( + rand:uniform(20), + [<<"Noah">>, <<"Oliver">>, <<"George">>, <<"Arthur">>, <<"Muhammad">>, + <<"Leo">>, <<"Harry">>, <<"Oscar">> , <<"Archie">>, <<"Henry">>, + <<"Olivia">>, <<"Amelia">>, <<"Isla">>, <<"Ava">>, <<"Ivy">>, + <<"Freya">>, <<"Lily">>, <<"Florence">>, <<"Mia">>, <<"Willow">> + ]). + +get_random_dob() -> + io_lib:format( + "~4..0B~2..0B~2..0B", + [1900 + rand:uniform(99), rand:uniform(12), rand:uniform(28)] + ). + +get_random_dod() -> + io_lib:format( + "~4..0B~2..0B~2..0B", + [2000 + rand:uniform(20), rand:uniform(12), rand:uniform(28)] + ). + +get_random_postcode() -> + io_lib:format( + "LS~w ~wXX", [rand:uniform(26), rand:uniform(9)] + ). + + +memory_tracking(Phase, Timeout) -> + spawn( + fun() -> + memory_tracking(Phase, Timeout, {0, 0, 0}, 0) + end + ). + +memory_tracking(Phase, Timeout, {TAcc, PAcc, BAcc}, Loops) -> + receive + {stop, Caller} -> + {T, P, B} = memory_usage(), + TAvg = (T + TAcc) div ((Loops + 1) * 1000000), + PAvg = (P + PAcc) div ((Loops + 1) * 1000000), + BAvg = (B + BAcc) div ((Loops + 1) * 1000000), + print_memory_stats(Phase, TAvg, PAvg, BAvg), + Caller ! {TAvg, PAvg, BAvg} + after Timeout -> + {T, P, B} = memory_usage(), + memory_tracking( + Phase, Timeout, {TAcc + T, PAcc + P, BAcc + B}, Loops + 1) + end. + + +-if(?performance == riak_ctperf). +print_memory_stats(_Phase, _TAvg, _PAvg, _BAvg) -> + ok. +-else. +print_memory_stats(Phase, TAvg, PAvg, BAvg) -> + io:format( + user, + "~nFor ~w memory stats: total ~wMB process ~wMB binary ~wMB~n", + [Phase, TAvg, PAvg, BAvg] + ). +-endif. + +dummy_accountant() -> + spawn(fun() -> receive {stop, Caller} -> Caller ! ok end end). + +stop_accounting(Accountant) -> + Accountant ! {stop, self()}, + receive ok -> ok end. + +stop_tracker(Tracker) -> + garbage_collect(), + % Garbage collect the test process, before getting the memory stats + Tracker ! {stop, self()}, + receive MemStats -> MemStats end. + +-if(?ACCOUNTING). + +-define(ACCT_TYPES, [scheduler, dirty_io_scheduler, dirty_cpu_scheduler, aux]). + +accounting(Phase, Timeout, ProfileList) -> + case lists:member(Phase, ProfileList) of + true -> + ZeroCounters = + #{ + emulator => 0, + aux => 0, + check_io => 0, + gc => 0, + other => 0 + }, + InitCounters = + lists:map(fun(T) -> {T, ZeroCounters} end, ?ACCT_TYPES), + spawn( + fun() -> + accounting(Phase, Timeout, maps:from_list(InitCounters), 0) + end + ); + false -> + dummy_accountant() + end. + +accounting(Phase, Timeout, Counters, Loops) -> + receive + {stop, Caller} -> + io:format( + user, + "~n~nStats for Phase ~p after loops ~p:~n", + [Phase, Loops] + ), + lists:foreach( + fun(S) -> + scheduler_output(S, maps:get(S, Counters)) + end, + ?ACCT_TYPES + ), + Caller ! ok + after Timeout -> + msacc:start(Timeout div 5), + UpdCounters = + lists:foldl( + fun(StatMap, CountersAcc) -> + Type = maps:get(type, StatMap), + case lists:member(Type, ?ACCT_TYPES) of + true -> + TypeAcc = + maps:intersect_with( + fun(_K, V1, V2) -> V1 + V2 end, + maps:get(counters, StatMap), + maps:get(Type, CountersAcc) + ), + maps:update(Type, TypeAcc, CountersAcc); + false -> + CountersAcc + end + end, + Counters, + msacc:stats() + ), + accounting(Phase, Timeout, UpdCounters, Loops + 1) + end. + +scheduler_output(Scheduler, CounterMap) -> + Total = + maps:get(emulator, CounterMap) + + maps:get(aux, CounterMap) + + maps:get(check_io, CounterMap) + + maps:get(gc, CounterMap) + + maps:get(other, CounterMap), + GC = maps:get(gc, CounterMap), + GCperc = case Total > 0 of true -> GC/Total; false -> 0.0 end, + io:format( + user, + "~nFor ~w:~n" + "emulator=~w, aux=~w, check_io=~w, gc=~w, other=~w~n" + "total ~w~n" + "percentage_gc ~.2f %~n", + [Scheduler, + maps:get(emulator, CounterMap), + maps:get(aux, CounterMap), + maps:get(check_io, CounterMap), + GC, + maps:get(other, CounterMap), + Total, + GCperc + ] + ). + +-else. + +accounting(_Phase, _Timeout, _ProfileList) -> + dummy_accountant(). + +-endif. \ No newline at end of file diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 222a7472..8a74ac16 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -483,12 +483,14 @@ rotate_wipe_compact(Strategy1, Strategy2) -> {ok, Book3} = leveled_bookie:book_start(BookOptsAlt), {KSpcL2, _V2} = testutil:put_indexed_objects(Book3, "AltBucket6", 3000), - Q2 = fun(RT) -> {index_query, - "AltBucket6", - {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, - {RT, undefined}} - end, + Q2 = + fun(RT) -> + {index_query, + "AltBucket6", + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {RT, undefined}} + end, {async, KFolder2A} = leveled_bookie:book_returnfolder(Book3, Q2(false)), KeyList2A = lists:usort(KFolder2A()), true = length(KeyList2A) == 3000, @@ -629,12 +631,14 @@ recovr_strategy(_Config) -> true = VCH == VCG end, lists:nthtail(6400, AllSpcL)), - Q = fun(RT) -> {index_query, - "Bucket6", - {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, - {RT, undefined}} - end, + Q = + fun(RT) -> + {index_query, + "Bucket6", + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {RT, undefined}} + end, {async, TFolder} = leveled_bookie:book_returnfolder(Book1, Q(true)), KeyTermList = TFolder(), {async, KFolder} = leveled_bookie:book_returnfolder(Book1, Q(false)), @@ -660,12 +664,14 @@ recovr_strategy(_Config) -> KeyList2 = lists:usort(KFolder2()), true = length(KeyList2) == 6400, - Q2 = fun(RT) -> {index_query, - "AltBucket6", - {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, - {RT, undefined}} - end, + Q2 = + fun(RT) -> + {index_query, + "AltBucket6", + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {RT, undefined}} + end, {async, KFolder2A} = leveled_bookie:book_returnfolder(Book2, Q2(false)), KeyList2A = lists:usort(KFolder2A()), true = length(KeyList2A) == 3000, diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 2c8dfc7e..f003be3e 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -68,7 +68,7 @@ compact_and_wait/1]). -define(RETURN_TERMS, {true, undefined}). --define(SLOWOFFER_DELAY, 10). +-define(SLOWOFFER_DELAY, 40). -define(V1_VERS, 1). -define(MAGIC, 53). % riak_kv -> riak_object -define(MD_VTAG, <<"X-Riak-VTag">>). @@ -691,21 +691,24 @@ load_objects(ChunkSize, GenList, Bookie, TestObject, Generator, SubListL) -> get_randomindexes_generator(Count) -> - Generator = fun() -> - lists:map(fun(X) -> - {add, - "idx" ++ integer_to_list(X) ++ "_bin", - get_randomdate() ++ get_randomname()} end, - lists:seq(1, Count)) + Generator = + fun() -> + lists:map( + fun(X) -> + {add, + list_to_binary("idx" ++ integer_to_list(X) ++ "_bin"), + list_to_binary(get_randomdate() ++ get_randomname())} + end, + lists:seq(1, Count)) end, Generator. name_list() -> [{1, "Sophia"}, {2, "Emma"}, {3, "Olivia"}, {4, "Ava"}, - {5, "Isabella"}, {6, "Mia"}, {7, "Zoe"}, {8, "Lily"}, - {9, "Emily"}, {10, "Madelyn"}, {11, "Madison"}, {12, "Chloe"}, - {13, "Charlotte"}, {14, "Aubrey"}, {15, "Avery"}, - {16, "Abigail"}]. + {5, "Isabella"}, {6, "Mia"}, {7, "Zoe"}, {8, "Lily"}, + {9, "Emily"}, {10, "Madelyn"}, {11, "Madison"}, {12, "Chloe"}, + {13, "Charlotte"}, {14, "Aubrey"}, {15, "Avery"}, + {16, "Abigail"}]. get_randomname() -> NameList = name_list(), @@ -738,7 +741,7 @@ check_indexed_objects(Book, B, KSpecL, V) -> fun({K, Spc}) -> {ok, O} = book_riakget(Book, B, K), V = testutil:get_value(O), - {add, "idx1_bin", IdxVal} = lists:keyfind(add, 1, Spc), + {add, <<"idx1_bin">>, IdxVal} = lists:keyfind(add, 1, Spc), {IdxVal, K} end, KSpecL), @@ -749,7 +752,7 @@ check_indexed_objects(Book, B, KSpecL, V) -> {index_query, B, {fun foldkeysfun/3, []}, - {"idx1_bin", "0", "|"}, + {<<"idx1_bin">>, <<"0">>, <<"|">>}, ?RETURN_TERMS}), SW = os:timestamp(), {async, Fldr} = R, @@ -796,11 +799,12 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i) -> put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V). put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) -> + SW = os:timestamp(), IndexGen = get_randomindexes_generator(1), - + ThisProcess = self(), FindAdditionFun = fun(SpcItem) -> element(1, SpcItem) == add end, MapFun = - fun({K, Spc}) -> + fun({K, Spc}, Acc) -> OldSpecs = lists:filter(FindAdditionFun, Spc), {RemoveSpc, AddSpc} = case RemoveOld2i of @@ -809,26 +813,45 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) -> false -> {[], OldSpecs} end, - {O, DeltaSpecs} = - set_object(Bucket, K, V, - IndexGen, RemoveSpc, AddSpc), - % DeltaSpecs should be new indexes added, and any old indexes which - % have been removed by this change where RemoveOld2i is true. - % - % The actual indexes within the object should reflect any history - % of indexes i.e. when RemoveOld2i is false. - % - % The [{Key, SpecL}] returned should accrue additions over loops if - % RemoveOld2i is false - case book_riakput(Book, O, DeltaSpecs) of - ok -> ok; - pause -> timer:sleep(?SLOWOFFER_DELAY) - end, + PutFun = + fun() -> + {O, DeltaSpecs} = + set_object( + Bucket, K, V, IndexGen, RemoveSpc, AddSpc), + % DeltaSpecs should be new indexes added, and any old + % indexes which have been removed by this change where + % RemoveOld2i is true. + % + % The actual indexes within the object should reflect any + % history of indexes i.e. when RemoveOld2i is false. + % + % The [{Key, SpecL}] returned should accrue additions over + % loops if RemoveOld2i is false + R = + case book_riakput(Book, O, DeltaSpecs) of + ok -> + ok; + pause -> + timer:sleep(?SLOWOFFER_DELAY), + pause + end, + ThisProcess ! {R, DeltaSpecs} + end, + spawn(PutFun), + AccOut = + receive + {ok, NewSpecs} -> Acc; + {pause, NewSpecs} -> Acc + 1 + end, % Note that order in the SpecL is important, as % check_indexed_objects, needs to find the latest item added - {K, DeltaSpecs ++ AddSpc} + {{K, NewSpecs ++ AddSpc}, AccOut} end, - RplKSpecL = lists:map(MapFun, KSpecL), + {RplKSpecL, Pauses} = lists:mapfoldl(MapFun, 0, KSpecL), + io:format( + "Altering ~w objects took ~w ms with ~w pauses~n", + [length(KSpecL), timer:now_diff(os:timestamp(), SW) div 1000, Pauses] + ), {RplKSpecL, V}. rotating_object_check(RootPath, B, NumberOfObjects) -> diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 16ea215d..37b5e1af 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -378,10 +378,13 @@ index_compare(_Config) -> GetTicTacTreeFun = fun(X, Bookie) -> SW = os:timestamp(), - ST = "!", - ET = "|", + ST = <<"!">>, + ET = <<"|">>, Q = {tictactree_idx, - {BucketBin, "idx" ++ integer_to_list(X) ++ "_bin", ST, ET}, + {BucketBin, + list_to_binary("idx" ++ integer_to_list(X) ++ "_bin"), + ST, + ET}, TreeSize, fun(_B, _K) -> accumulate end}, {async, Folder} = leveled_bookie:book_returnfolder(Bookie, Q), @@ -442,12 +445,14 @@ index_compare(_Config) -> true = DL2_0 == [], true = length(DL2_1) > 100, - IdxSpc = {add, "idx2_bin", "zz999"}, - {TestObj, TestSpc} = testutil:generate_testobject(BucketBin, - term_to_binary("K9.Z"), - "Value1", - [IdxSpc], - [{"MDK1", "MDV1"}]), + IdxSpc = {add, <<"idx2_bin">>, <<"zz999">>}, + {TestObj, TestSpc} = + testutil:generate_testobject( + BucketBin, + term_to_binary("K9.Z"), + "Value1", + [IdxSpc], + [{"MDK1", "MDV1"}]), ok = testutil:book_riakput(Book2C, TestObj, TestSpc), testutil:check_forobject(Book2C, TestObj), @@ -457,25 +462,30 @@ index_compare(_Config) -> TicTacTree3_P3 = GetTicTacTreeFun(2, Book2D), % Merge the tree across the partitions - TicTacTree3_Joined = lists:foldl(fun leveled_tictac:merge_trees/2, - TicTacTree3_P1, - [TicTacTree3_P2, TicTacTree3_P3]), + TicTacTree3_Joined = + lists:foldl( + fun leveled_tictac:merge_trees/2, + TicTacTree3_P1, + [TicTacTree3_P2, TicTacTree3_P3]), % Find all keys index, and then just the last key IdxQ1 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, - {"idx2_bin", "zz", "zz|"}, + {<<"idx2_bin">>, <<"zz">>, <<"zz|">>}, {true, undefined}}, {async, IdxFolder1} = leveled_bookie:book_returnfolder(Book2C, IdxQ1), true = IdxFolder1() >= 1, - DL_3to2B = leveled_tictac:find_dirtyleaves(TicTacTree2_P1, - TicTacTree3_P1), - DL_3to2C = leveled_tictac:find_dirtyleaves(TicTacTree2_P2, - TicTacTree3_P2), - DL_3to2D = leveled_tictac:find_dirtyleaves(TicTacTree2_P3, - TicTacTree3_P3), + DL_3to2B = + leveled_tictac:find_dirtyleaves( + TicTacTree2_P1, TicTacTree3_P1), + DL_3to2C = + leveled_tictac:find_dirtyleaves( + TicTacTree2_P2, TicTacTree3_P2), + DL_3to2D = + leveled_tictac:find_dirtyleaves( + TicTacTree2_P3, TicTacTree3_P3), io:format("Individual tree comparison found dirty leaves of ~w ~w ~w~n", [DL_3to2B, DL_3to2C, DL_3to2D]), @@ -509,7 +519,7 @@ index_compare(_Config) -> MismatchQ = {index_query, BucketBin, {FoldKeysIndexQFun, []}, - {"idx2_bin", "!", "|"}, + {<<"idx2_bin">>, <<"!">>, <<"|">>}, {true, undefined}}, {async, MMFldr_2A} = leveled_bookie:book_returnfolder(Book2A, MismatchQ), {async, MMFldr_2B} = leveled_bookie:book_returnfolder(Book2B, MismatchQ), @@ -531,7 +541,7 @@ index_compare(_Config) -> io:format("Differences between lists ~w~n", [Diffs]), % The actual difference is discovered - true = lists:member({"zz999", term_to_binary("K9.Z")}, Diffs), + true = lists:member({<<"zz999">>, term_to_binary("K9.Z")}, Diffs), % Without discovering too many others true = length(Diffs) < 20,