From d45356a4f748ce06cd62fb0221f50658ccfd8588 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 15 Jul 2024 20:49:21 +0100 Subject: [PATCH 1/2] Extend perf_SUITE (#434) * Extend perf_SUITE This is v6 of the perf_SUITE tests. The test adds a complex index entry to every object, and then adds a new test phase to test regex queries. There are three profiles added so the full, mini and profiling versions of perf_SUITE can be run without having to edit the file itself: e.g. ./rebar3 as perf_mini do ct --suite=test/end_to_end/perf_SUITE When testing as `perf_prof` summarised versions of the eprof results are now printed to screen. The volume of keys within the full test suite has been dropped ... just to make life easier so that test run times are not excessively increase by the new features. * Load chunk in spawned processes Assume to make the job of gs easier - name makes a massive difference to load time in OTP 24. * Correctly account for pause alos try and improve test stability by increasing pause * Add microstate accounting to profile * Add memory tracking during test phases Identify and log out memory usage by test phase * Use macros instead (#437) * Don't print memory to screen in standard ct test --------- Co-authored-by: Thomas Arts --- rebar.config | 5 +- test/end_to_end/iterator_SUITE.erl | 380 ++++++++++---------- test/end_to_end/perf_SUITE.erl | 554 +++++++++++++++++++++++------ test/end_to_end/recovery_SUITE.erl | 42 ++- test/end_to_end/testutil.erl | 87 +++-- test/end_to_end/tictac_SUITE.erl | 52 +-- 6 files changed, 749 insertions(+), 371 deletions(-) diff --git a/rebar.config b/rebar.config index 43c930b3..f0b17482 100644 --- a/rebar.config +++ b/rebar.config @@ -22,7 +22,10 @@ {plugins, [rebar_eqc]} ]}, {test, [{extra_src_dirs, ["test/end_to_end", "test/property"]} - ]} + ]}, + {perf_full, [{erl_opts, [{d, performance, riak_fullperf}]}]}, + {perf_mini, [{erl_opts, [{d, performance, riak_miniperf}]}]}, + {perf_prof, [{erl_opts, [{d, performance, riak_profileperf}]}]} ]}. {deps, [ diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 217d209f..ef5efdf0 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -213,11 +213,12 @@ breaking_folds(_Config) -> % Find all keys index, and then same again but stop at a midpoint using a % throw {async, IdxFolder} = - leveled_bookie:book_indexfold(Bookie1, - list_to_binary("Bucket"), - {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, - {true, undefined}), + leveled_bookie:book_indexfold( + Bookie1, + list_to_binary("Bucket"), + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {true, undefined}), KeyList1 = lists:reverse(IdxFolder()), io:format("Index fold with result size ~w~n", [length(KeyList1)]), true = KeyCount == length(KeyList1), @@ -235,11 +236,12 @@ breaking_folds(_Config) -> end end, {async, IdxFolderToMidK} = - leveled_bookie:book_indexfold(Bookie1, - list_to_binary("Bucket"), - {FoldKeyThrowFun, []}, - {"idx1_bin", "#", "|"}, - {true, undefined}), + leveled_bookie:book_indexfold( + Bookie1, + list_to_binary("Bucket"), + {FoldKeyThrowFun, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {true, undefined}), CatchingFold = fun(AsyncFolder) -> try @@ -261,10 +263,8 @@ breaking_folds(_Config) -> [{K, Size}|Acc] end, {async, HeadFolder} = - leveled_bookie:book_headfold(Bookie1, - ?RIAK_TAG, - {HeadFoldFun, []}, - true, true, false), + leveled_bookie:book_headfold( + Bookie1, ?RIAK_TAG, {HeadFoldFun, []}, true, true, false), KeySizeList1 = lists:reverse(HeadFolder()), io:format("Head fold with result size ~w~n", [length(KeySizeList1)]), true = KeyCount == length(KeySizeList1), @@ -472,11 +472,9 @@ small_load_with2i(_Config) -> testutil:check_forobject(Bookie1, TestObject), ObjectGen = testutil:get_compressiblevalue_andinteger(), IndexGen = testutil:get_randomindexes_generator(8), - ObjL1 = testutil:generate_objects(10000, - uuid, - [], - ObjectGen, - IndexGen), + ObjL1 = + testutil:generate_objects( + 10000, uuid, [], ObjectGen, IndexGen), testutil:riakload(Bookie1, ObjL1), ChkList1 = lists:sublist(lists:sort(ObjL1), 100), testutil:check_forlist(Bookie1, ChkList1), @@ -486,7 +484,7 @@ small_load_with2i(_Config) -> IdxQ1 = {index_query, "Bucket", {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, {true, undefined}}, {async, IdxFolder} = leveled_bookie:book_returnfolder(Bookie1, IdxQ1), KeyList1 = lists:usort(IdxFolder()), @@ -495,7 +493,7 @@ small_load_with2i(_Config) -> IdxQ2 = {index_query, {"Bucket", LastKey}, {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", LastTerm, "|"}, + {<<"idx1_bin">>, LastTerm, <<"|">>}, {false, undefined}}, {async, IdxFolderLK} = leveled_bookie:book_returnfolder(Bookie1, IdxQ2), KeyList2 = lists:usort(IdxFolderLK()), @@ -530,12 +528,14 @@ small_load_with2i(_Config) -> {FoldObjectsFun, []}, false), KeyHashList2 = HTreeF2(), - {async, HTreeF3} = leveled_bookie:book_objectfold(Bookie1, - ?RIAK_TAG, - "Bucket", - {"idx1_bin", "#", "|"}, - {FoldObjectsFun, []}, - false), + {async, HTreeF3} = + leveled_bookie:book_objectfold( + Bookie1, + ?RIAK_TAG, + "Bucket", + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {FoldObjectsFun, []}, + false), KeyHashList3 = HTreeF3(), true = 9901 == length(KeyHashList1), % also includes the test object true = 9900 == length(KeyHashList2), @@ -585,96 +585,86 @@ small_load_with2i(_Config) -> query_count(_Config) -> RootPath = testutil:reset_filestructure(), - {ok, Book1} = leveled_bookie:book_start(RootPath, - 2000, - 50000000, - testutil:sync_strategy()), + {ok, Book1} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), BucketBin = list_to_binary("Bucket"), - {TestObject, TestSpec} = testutil:generate_testobject(BucketBin, - term_to_binary("Key1"), - "Value1", - [], - [{"MDK1", "MDV1"}]), + {TestObject, TestSpec} = + testutil:generate_testobject( + BucketBin, term_to_binary("Key1"), "Value1", [], [{"MDK1", "MDV1"}]), ok = testutil:book_riakput(Book1, TestObject, TestSpec), testutil:check_forobject(Book1, TestObject), testutil:check_formissingobject(Book1, "Bucket1", "Key2"), testutil:check_forobject(Book1, TestObject), - lists:foreach(fun(_X) -> - V = testutil:get_compressiblevalue(), - Indexes = testutil:get_randomindexes_generator(8), - SW = os:timestamp(), - ObjL1 = testutil:generate_objects(10000, - binary_uuid, - [], - V, - Indexes), - testutil:riakload(Book1, ObjL1), - io:format("Put of 10000 objects with 8 index entries " - ++ - "each completed in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SW)]) - end, - lists:seq(1, 8)), + lists:foreach( + fun(_X) -> + V = testutil:get_compressiblevalue(), + Indexes = testutil:get_randomindexes_generator(8), + SW = os:timestamp(), + ObjL1 = testutil:generate_objects(10000, + binary_uuid, + [], + V, + Indexes), + testutil:riakload(Book1, ObjL1), + io:format( + "Put of 10000 objects with 8 index entries " + "each completed in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SW)]) + end, + lists:seq(1, 8)), testutil:check_forobject(Book1, TestObject), - Total = lists:foldl(fun(X, Acc) -> - IdxF = "idx" ++ integer_to_list(X) ++ "_bin", - T = count_termsonindex(BucketBin, - IdxF, - Book1, - ?KEY_ONLY), - io:format("~w terms found on index ~s~n", - [T, IdxF]), - Acc + T - end, - 0, - lists:seq(1, 8)), - ok = case Total of - 640000 -> - ok + Total = + lists:foldl( + fun(X, Acc) -> + IdxF = "idx" ++ integer_to_list(X) ++ "_bin", + T = + count_termsonindex( + BucketBin, list_to_binary(IdxF), Book1, ?KEY_ONLY), + io:format("~w terms found on index ~s~n", [T, IdxF]), + Acc + T end, - Index1Count = count_termsonindex(BucketBin, - "idx1_bin", - Book1, - ?KEY_ONLY), + 0, + lists:seq(1, 8)), + true = Total == 640000, + Index1Count = + count_termsonindex( + BucketBin, <<"idx1_bin">>, Book1, ?KEY_ONLY), ok = leveled_bookie:book_close(Book1), - {ok, Book2} = leveled_bookie:book_start(RootPath, - 1000, - 50000000, - testutil:sync_strategy()), - Index1Count = count_termsonindex(BucketBin, - "idx1_bin", - Book2, - ?KEY_ONLY), + {ok, Book2} = + leveled_bookie:book_start( + RootPath, 1000, 50000000, testutil:sync_strategy()), + Index1Count = + count_termsonindex( + BucketBin, <<"idx1_bin">>, Book2, ?KEY_ONLY), NameList = testutil:name_list(), - TotalNameByName = lists:foldl(fun({_X, Name}, Acc) -> - {ok, Regex} = re:compile("[0-9]+" ++ - Name), - SW = os:timestamp(), - T = count_termsonindex(BucketBin, - "idx1_bin", - Book2, - {false, - Regex}), - TD = timer:now_diff(os:timestamp(), - SW), - io:format("~w terms found on " ++ - "index idx1 with a " ++ - "regex in ~w " ++ - "microseconds~n", - [T, TD]), - Acc + T - end, - 0, - NameList), - ok = case TotalNameByName of - Index1Count -> - ok + TotalNameByName = + lists:foldl( + fun({_X, Name}, Acc) -> + {ok, Regex} = + re:compile("[0-9]+" ++ Name), + SW = os:timestamp(), + T = + count_termsonindex( + BucketBin, + list_to_binary("idx1_bin"), + Book2, + {false, Regex}), + TD = timer:now_diff(os:timestamp(), SW), + io:format( + "~w terms found on index idx1 with a " + "regex in ~w microseconds~n", + [T, TD]), + Acc + T end, + 0, + NameList), + true = TotalNameByName == Index1Count, {ok, RegMia} = re:compile("[0-9]+Mia"), Query1 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, - {"idx2_bin", "2000", "2000|"}, + {<<"idx2_bin">>, <<"2000">>, <<"2000|">>}, {false, RegMia}}, {async, Mia2KFolder1} = leveled_bookie:book_returnfolder(Book2, Query1), @@ -682,7 +672,7 @@ query_count(_Config) -> Query2 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, - {"idx2_bin", "2000", "2001"}, + {<<"idx2_bin">>, <<"2000">>, <<"2001">>}, {true, undefined}}, {async, Mia2KFolder2} = leveled_bookie:book_returnfolder(Book2, Query2), @@ -705,7 +695,7 @@ query_count(_Config) -> Query3 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, - {"idx2_bin", "1980", "2100"}, + {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, {false, RxMia2K}}, {async, Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3), @@ -713,26 +703,26 @@ query_count(_Config) -> V9 = testutil:get_compressiblevalue(), Indexes9 = testutil:get_randomindexes_generator(8), - [{_RN, Obj9, Spc9}] = testutil:generate_objects(1, - binary_uuid, - [], - V9, - Indexes9), + [{_RN, Obj9, Spc9}] = + testutil:generate_objects( + 1, binary_uuid, [], V9, Indexes9), ok = testutil:book_riakput(Book2, Obj9, Spc9), - R9 = lists:map(fun({add, IdxF, IdxT}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, - R = leveled_bookie:book_returnfolder(Book2, Q), - {async, Fldr} = R, - case length(Fldr()) of - X when X > 0 -> - {IdxF, IdxT, X} - end - end, - Spc9), + R9 = + lists:map( + fun({add, IdxF, IdxT}) -> + Q = {index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, + ?KEY_ONLY}, + R = leveled_bookie:book_returnfolder(Book2, Q), + {async, Fldr} = R, + case length(Fldr()) of + X when X > 0 -> + {IdxF, IdxT, X} + end + end, + Spc9), Spc9Del = lists:map(fun({add, IdxF, IdxT}) -> {remove, IdxF, IdxT} end, Spc9), ok = testutil:book_riakput(Book2, Obj9, Spc9Del), @@ -751,44 +741,44 @@ query_count(_Config) -> end, R9), ok = leveled_bookie:book_close(Book2), - {ok, Book3} = leveled_bookie:book_start(RootPath, - 2000, - 50000000, - testutil:sync_strategy()), - lists:foreach(fun({IdxF, IdxT, X}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, - R = leveled_bookie:book_returnfolder(Book3, Q), - {async, Fldr} = R, - case length(Fldr()) of - Y -> - Y = X - 1 - end - end, - R9), + {ok, Book3} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + lists:foreach( + fun({IdxF, IdxT, X}) -> + Q = {index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, + ?KEY_ONLY}, + R = leveled_bookie:book_returnfolder(Book3, Q), + {async, Fldr} = R, + case length(Fldr()) of + Y -> + Y = X - 1 + end + end, + R9), ok = testutil:book_riakput(Book3, Obj9, Spc9), ok = leveled_bookie:book_close(Book3), - {ok, Book4} = leveled_bookie:book_start(RootPath, - 2000, - 50000000, - testutil:sync_strategy()), - lists:foreach(fun({IdxF, IdxT, X}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, - R = leveled_bookie:book_returnfolder(Book4, Q), - {async, Fldr} = R, - case length(Fldr()) of - X -> - ok - end - end, - R9), + {ok, Book4} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + lists:foreach( + fun({IdxF, IdxT, X}) -> + Q = {index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, + ?KEY_ONLY}, + R = leveled_bookie:book_returnfolder(Book4, Q), + {async, Fldr} = R, + case length(Fldr()) of + X -> + ok + end + end, + R9), testutil:check_forobject(Book4, TestObject), FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end, @@ -803,24 +793,15 @@ query_count(_Config) -> true = sets:size(BucketSet1) == 1, - ObjList10A = testutil:generate_objects(5000, - binary_uuid, - [], - V9, - Indexes9, - "BucketA"), - ObjList10B = testutil:generate_objects(5000, - binary_uuid, - [], - V9, - Indexes9, - "BucketB"), - ObjList10C = testutil:generate_objects(5000, - binary_uuid, - [], - V9, - Indexes9, - "BucketC"), + ObjList10A = + testutil:generate_objects( + 5000, binary_uuid, [], V9, Indexes9, "BucketA"), + ObjList10B = + testutil:generate_objects( + 5000, binary_uuid, [], V9, Indexes9, "BucketB"), + ObjList10C = + testutil:generate_objects( + 5000, binary_uuid, [], V9, Indexes9, "BucketC"), testutil:riakload(Book4, ObjList10A), testutil:riakload(Book4, ObjList10B), testutil:riakload(Book4, ObjList10C), @@ -847,31 +828,30 @@ query_count(_Config) -> ok = leveled_bookie:book_close(Book5), testutil:reset_filestructure(). - count_termsonindex(Bucket, IdxField, Book, QType) -> - lists:foldl(fun(X, Acc) -> - SW = os:timestamp(), - ST = integer_to_list(X), - ET = ST ++ "|", - Q = {index_query, - Bucket, - {fun testutil:foldkeysfun/3, []}, - {IdxField, ST, ET}, - QType}, - R = leveled_bookie:book_returnfolder(Book, Q), - {async, Folder} = R, - Items = length(Folder()), - io:format("2i query from term ~s on index ~s took " ++ - "~w microseconds~n", - [ST, - IdxField, - timer:now_diff(os:timestamp(), SW)]), - Acc + Items - end, - 0, - lists:seq(190, 221)). + lists:foldl( + fun(X, Acc) -> + SW = os:timestamp(), + ST = list_to_binary(integer_to_list(X)), + Pipe = <<"|">>, + ET = <>, + Q = {index_query, + Bucket, + {fun testutil:foldkeysfun/3, []}, + {IdxField, ST, ET}, + QType}, + R = leveled_bookie:book_returnfolder(Book, Q), + {async, Folder} = R, + Items = length(Folder()), + io:format( + "2i query from term ~s on index ~s took ~w microseconds~n", + [ST, IdxField, timer:now_diff(os:timestamp(), SW)]), + Acc + Items + end, + 0, + lists:seq(190, 221)). multibucket_fold(_Config) -> RootPath = testutil:reset_filestructure(), diff --git a/test/end_to_end/perf_SUITE.erl b/test/end_to_end/perf_SUITE.erl index 9a3fabd1..98055830 100644 --- a/test/end_to_end/perf_SUITE.erl +++ b/test/end_to_end/perf_SUITE.erl @@ -3,10 +3,25 @@ -define(INFO, info). -export([all/0, suite/0]). -export([ - riak_ctperf/1, riak_fullperf/1, riak_profileperf/1 + riak_ctperf/1, riak_fullperf/1, riak_profileperf/1, riak_miniperf/1 ]). -all() -> [riak_ctperf]. +-define(PEOPLE_INDEX, <<"people_bin">>). +-define(MINI_QUERY_DIVISOR, 8). +-define(RGEX_QUERY_DIVISOR, 32). + +-ifndef(performance). + -define(performance, riak_ctperf). +-endif. +all() -> [?performance]. + +-if(?performance == riak_profileperf andalso ?OTP_RELEASE >= 24). + % Requires map functions from OTP 24 + -define(ACCOUNTING, true). +-else. + -define(ACCOUNTING, false). +-endif. + suite() -> [{timetrap, {hours, 16}}]. riak_fullperf(_Config) -> @@ -24,16 +39,16 @@ riak_fullperf(ObjSize, PM, LC) -> output_result(R5A), R5B = riak_load_tester(Bucket, 5000000, ObjSize, [], PM, LC), output_result(R5B), - R10 = riak_load_tester(Bucket, 10000000, ObjSize, [], PM, LC), + R10 = riak_load_tester(Bucket, 8000000, ObjSize, [], PM, LC), output_result(R10) . riak_profileperf(_Config) -> riak_load_tester( {<<"SensibleBucketTypeName">>, <<"SensibleBucketName0">>}, - 2000000, + 1200000, 2048, - [load, head, get, query, mini_query, full, guess, estimate, update], + [load, head, get, query, mini_query, regex_query, full, guess, estimate, update], zstd, as_store ). @@ -42,12 +57,18 @@ riak_profileperf(_Config) -> riak_ctperf(_Config) -> riak_load_tester(<<"B0">>, 400000, 1024, [], native, as_store). +riak_miniperf(_Config) -> + Bucket = {<<"SensibleBucketTypeName">>, <<"SensibleBucketName0">>}, + R2A = riak_load_tester(Bucket, 2000000, 2048, [], zstd, as_store), + output_result(R2A). + riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> ct:log( ?INFO, "Basic riak test with KeyCount ~w ObjSize ~w PressMethod ~w Ledger ~w", [KeyCount, ObjSize, PM, LC] ), + IndexCount = 100000, GetFetches = KeyCount div 4, @@ -74,6 +95,7 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> IntIndex = "integer" ++ integer_to_list(ListID) ++ "_int", BinIndex = "binary" ++ integer_to_list(ListID) ++ "_bin", [{add, list_to_binary(IntIndex), RandInt}, + {add, ?PEOPLE_INDEX, list_to_binary(random_people_index())}, {add, list_to_binary(IntIndex), RandInt + 1}, {add, list_to_binary(BinIndex), <>}, {add, list_to_binary(BinIndex), <<(RandInt + 1):32/integer>>}] @@ -82,6 +104,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> CountPerList = KeyCount div 10, + LoadMemoryTracker = memory_tracking(load, 1000), + LoadAccountant = accounting(load, 10000, ProfileList), TC4 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 4), TC1 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 1), TC9 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 9), @@ -92,6 +116,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> TC3 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 3), TC7 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 7), TC10 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 10), + ok = stop_accounting(LoadAccountant), + {MT0, MP0, MB0} = stop_tracker(LoadMemoryTracker), ct:log( ?INFO, @@ -104,20 +130,23 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> (TC1 + TC2 + TC3 + TC4 + TC5 + TC6 + TC7 + TC8 + TC9 + TC10) div 1000, ct:log(?INFO, "Total load time ~w ms", [TotalLoadTime]), - {MT0, MP0, MB0} = memory_usage(), - + HeadMemoryTracker = memory_tracking(head, 1000), + HeadAccountant = accounting(head, 2000, ProfileList), TotalHeadTime = random_fetches(head, Bookie1, Bucket, KeyCount, HeadFetches), - - {MT1, MP1, MB1} = memory_usage(), + ok = stop_accounting(HeadAccountant), + {MT1, MP1, MB1} = stop_tracker(HeadMemoryTracker), + GetMemoryTracker = memory_tracking(get, 1000), + GetAccountant = accounting(get, 3000, ProfileList), TotalGetTime = random_fetches(get, Bookie1, Bucket, KeyCount, GetFetches), + ok = stop_accounting(GetAccountant), + {MT2, MP2, MB2} = stop_tracker(GetMemoryTracker), - {MT2, MP2, MB2} = memory_usage(), - + QueryMemoryTracker = memory_tracking(query, 1000), + QueryAccountant = accounting(query, 1000, ProfileList), QuerySize = max(10, IndexCount div 1000), - MiniQuerySize = max(1, IndexCount div 50000), TotalQueryTime = random_queries( Bookie1, @@ -126,6 +155,12 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> IndexCount, QuerySize, IndexesReturned), + ok = stop_accounting(QueryAccountant), + {MT3a, MP3a, MB3a} = stop_tracker(QueryMemoryTracker), + + MiniQueryMemoryTracker = memory_tracking(mini_query, 1000), + MiniQueryAccountant = accounting(mini_query, 1000, ProfileList), + MiniQuerySize = max(1, IndexCount div 50000), TotalMiniQueryTime = random_queries( Bookie1, @@ -133,18 +168,76 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> 10, IndexCount, MiniQuerySize, - IndexesReturned div 8), + IndexesReturned div ?MINI_QUERY_DIVISOR), + ok = stop_accounting(MiniQueryAccountant), + {MT3b, MP3b, MB3b} = stop_tracker(MiniQueryMemoryTracker), + + RegexQueryMemoryTracker = memory_tracking(regex_query, 1000), + RegexQueryAccountant = accounting(regex_query, 2000, ProfileList), + RegexQueryTime = + random_people_queries( + Bookie1, + Bucket, + IndexesReturned div ?RGEX_QUERY_DIVISOR), + ok = stop_accounting(RegexQueryAccountant), + {MT3c, MP3c, MB3c} = stop_tracker(RegexQueryMemoryTracker), - {MT3, MP3, MB3} = memory_usage(), + GuessMemoryTracker = memory_tracking(guess, 1000), + GuessAccountant = accounting(guess, 1000, ProfileList), + {GuessTime, GuessCount} = + lists:foldl( + fun(_I, {TSAcc, CountAcc}) -> + {TS, Count} = counter(Bookie1, guess), + {TSAcc + TS, CountAcc + Count} + end, + {0, 0}, + lists:seq(1, 60) + ), + ok = stop_accounting(GuessAccountant), + {MT4a, MP4a, MB4a} = stop_tracker(GuessMemoryTracker), - {FullFoldTime, SegFoldTime} = size_estimate_summary(Bookie1), + EstimateMemoryTracker = memory_tracking(estimate, 1000), + EstimateAccountant = accounting(estimate, 1000, ProfileList), + {EstimateTime, EstimateCount} = + lists:foldl( + fun(_I, {TSAcc, CountAcc}) -> + {TS, Count} = counter(Bookie1, estimate), + {TSAcc + TS, CountAcc + Count} + end, + {0, 0}, + lists:seq(1, 40) + ), + ok = stop_accounting(EstimateAccountant), + {MT4b, MP4b, MB4b} = stop_tracker(EstimateMemoryTracker), - {MT4, MP4, MB4} = memory_usage(), + SegFoldTime = (GuessTime + EstimateTime) div 1000, + + FullFoldMemoryTracker = memory_tracking(full, 1000), + FullFoldAccountant = accounting(full, 2000, ProfileList), + {FullFoldTime, FullFoldCount} = + lists:foldl( + fun(_I, {TSAcc, CountAcc}) -> + {TS, Count} = counter(Bookie1, full), + {TSAcc + TS, CountAcc + Count} + end, + {0, 0}, + lists:seq(1, 5) + ), + ok = stop_accounting(FullFoldAccountant), + {MT5, MP5, MB5} = stop_tracker(FullFoldMemoryTracker), + ct:log( + info, + "Guess size ~w Estimate size ~w Actual size ~w", + [GuessCount div 60, EstimateCount div 40, FullFoldCount div 10] + ), + + UpdateMemoryTracker = memory_tracking(update, 1000), + UpdateAccountant = accounting(update, 1000, ProfileList), TotalUpdateTime = rotate_chunk(Bookie1, <<"UpdBucket">>, KeyCount div 50, ObjSize), - - {MT5, MP5, MB5} = memory_usage(), + ok = stop_accounting(UpdateAccountant), + {MT6, MP6, MB6} = stop_tracker(UpdateMemoryTracker), DiskSpace = lists:nth(1, string:tokens(os:cmd("du -sh riakLoad"), "\t")), ct:log(?INFO, "Disk space taken by test ~s", [DiskSpace]), @@ -172,8 +265,9 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> P -> P end, + io:format(user, "~nProfile ~p:~n", [P]), ProFun = profile_fun(P0, ProfileData), - profile_test(Bookie1, ProFun) + profile_test(Bookie1, ProFun, P) end, ProfileList), @@ -183,21 +277,26 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> {KeyCount, ObjSize, {PM, LC}, TotalLoadTime, TotalHeadTime, TotalGetTime, - TotalQueryTime, TotalMiniQueryTime, FullFoldTime, SegFoldTime, + TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, + FullFoldTime div 1000, SegFoldTime, TotalUpdateTime, DiskSpace, - {(MT0 + MT1 + MT2 + MT3 + MT4 + MT5) div 6000000, - (MP0 + MP1 + MP2 + MP3 + MP4 + MP5) div 6000000, - (MB0 + MB1 + MB2 + MB3 + MB4 + MB5) div 6000000}, + {(MT0 + MT1 + MT2 + MT3a + MT3b + MT3c + MT4a + MT4b + MT5 + MT6) + div 9, + (MP0 + MP1 + MP2 + MP3a + MP3b + MP3c + MP4a + MP4b + MP5 + MP6) + div 9, + (MB0 + MB1 + MB2 + MB3a + MB3b + MB3c + MB4a + MB4b + MB5 + MB6) + div 9}, SSTPids, CDBPids}. - -profile_test(Bookie, ProfileFun) -> +profile_test(Bookie, ProfileFun, P) -> {Inker, Pcl, SSTPids, PClerk, CDBPids, IClerk} = get_pids(Bookie), TestPid = self(), profile_app( [TestPid, Bookie, Inker, IClerk, Pcl, PClerk] ++ SSTPids ++ CDBPids, - ProfileFun). + ProfileFun, + P + ). get_pids(Bookie) -> {ok, Inker, Pcl} = leveled_bookie:book_returnactors(Bookie), @@ -211,7 +310,8 @@ output_result( {KeyCount, ObjSize, PressMethod, TotalLoadTime, TotalHeadTime, TotalGetTime, - TotalQueryTime, TotalMiniQueryTime, TotalFullFoldTime, TotalSegFoldTime, + TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, + TotalFullFoldTime, TotalSegFoldTime, TotalUpdateTime, DiskSpace, {TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB}, @@ -227,6 +327,7 @@ output_result( "TotalGetTime - ~w ms~n" "TotalQueryTime - ~w ms~n" "TotalMiniQueryTime - ~w ms~n" + "TotalRegexQueryTime - ~w ms~n" "TotalFullFoldTime - ~w ms~n" "TotalAAEFoldTime - ~w ms~n" "TotalUpdateTime - ~w ms~n" @@ -236,7 +337,8 @@ output_result( "Closing count of CDB Files - ~w~n", [KeyCount, ObjSize, PressMethod, TotalLoadTime, TotalHeadTime, TotalGetTime, - TotalQueryTime, TotalMiniQueryTime, TotalFullFoldTime, TotalSegFoldTime, + TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, + TotalFullFoldTime, TotalSegFoldTime, TotalUpdateTime, DiskSpace, TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB, @@ -244,13 +346,12 @@ output_result( ). memory_usage() -> - garbage_collect(), % GC the test process MemoryUsage = erlang:memory(), {element(2, lists:keyfind(total, 1, MemoryUsage)), element(2, lists:keyfind(processes, 1, MemoryUsage)), element(2, lists:keyfind(binary, 1, MemoryUsage))}. -profile_app(Pids, ProfiledFun) -> +profile_app(Pids, ProfiledFun, P) -> eprof:start(), eprof:start_profiling(Pids), @@ -258,40 +359,12 @@ profile_app(Pids, ProfiledFun) -> ProfiledFun(), eprof:stop_profiling(), - eprof:analyze(total), - eprof:stop(). - -size_estimate_summary(Bookie) -> - Loops = 10, - ct:log( - ?INFO, - "Size Estimate Tester (SET) started with Loops ~w", - [Loops] - ), - {{TotalGuessTime, TotalEstimateTime, TotalCountTime}, - {TotalEstimateVariance, TotalGuessVariance}} = - lists:foldl( - fun(_I, {{GT, ET, CT}, {AET, AGT}}) -> - {{GT0, ET0, CT0}, {AE0, AG0}} = size_estimate_tester(Bookie), - {{GT + GT0, ET + ET0, CT + CT0}, {AET + AE0, AGT + AG0}} - end, - {{0, 0, 0}, {0, 0}}, - lists:seq(1, Loops) - ), - ct:log( - ?INFO, - "SET: MeanGuess ~w ms MeanEstimate ~w ms MeanCount ~w ms", - [TotalGuessTime div 10000, - TotalEstimateTime div 10000, - TotalCountTime div 10000] - ), - ct:log( - ?INFO, - "Mean variance in Estimate ~w Guess ~w", - [TotalEstimateVariance div Loops, TotalGuessVariance div Loops] - ), - %% Assume that segment-list folds are 10 * as common as all folds - {TotalCountTime div 1000, (TotalGuessTime + TotalEstimateTime) div 1000}. + eprof:log(atom_to_list(P) ++ ".log"), + eprof:analyze(total, [{filter, [{time, 150000}]}]), + eprof:stop(), + {ok, Analysis} = file:read_file(atom_to_list(P) ++ ".log"), + io:format(user, "~n~s~n", [Analysis]) + . rotate_chunk(Bookie, Bucket, KeyCount, ObjSize) -> @@ -311,15 +384,6 @@ rotate_chunk(Bookie, Bucket, KeyCount, ObjSize) -> end), TC div 1000. -load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> - ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]), - ObjList = - generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk), - {TC, ok} = timer:tc(fun() -> testutil:riakload(Bookie, ObjList) end), - garbage_collect(), - timer:sleep(2000), - TC. - generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> testutil:generate_objects( CountPerList, @@ -329,31 +393,60 @@ generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> Bucket ). -size_estimate_tester(Bookie) -> - %% Data size test - calculate data size, then estimate data size - {CountTS, Count} = counter(Bookie, full), - {CountTSEstimate, CountEstimate} = counter(Bookie, estimate), - {CountTSGuess, CountGuess} = counter(Bookie, guess), - {GuessTolerance, EstimateTolerance} = - case Count of - C when C < 500000 -> - {0.20, 0.15}; - C when C < 1000000 -> - {0.12, 0.1}; - C when C < 2000000 -> - {0.1, 0.08}; - _C -> - {0.08, 0.05} - end, +load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> + ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]), + time_load_chunk( + Bookie, + {Bucket, base64:encode(leveled_rand:rand_bytes(ObjSize)), IndexGenFun(Chunk)}, + (Chunk - 1) * CountPerList + 1, + Chunk * CountPerList, + 0, + 0 + ). - true = - ((CountGuess / Count) > (1.0 - GuessTolerance)) - and ((CountGuess / Count) < (1.0 + GuessTolerance)), - true = - ((CountEstimate / Count) > (1.0 - EstimateTolerance)) - and ((CountEstimate / Count) < (1.0 + EstimateTolerance)), - {{CountTSGuess, CountTSEstimate, CountTS}, - {abs(CountEstimate - Count), abs(CountGuess - Count)}}. +time_load_chunk( + _Bookie, _ObjDetails, KeyNumber, TopKey, TotalTime, PC) + when KeyNumber > TopKey -> + garbage_collect(), + timer:sleep(2000), + ct:log( + ?INFO, + "Count of ~w pauses during chunk load", + [PC] + ), + TotalTime; +time_load_chunk( + Bookie, {Bucket, Value, IndexGen}, KeyNumber, TopKey, TotalTime, PC) -> + ThisProcess = self(), + spawn( + fun() -> + {RiakObj, IndexSpecs} = + testutil:set_object( + Bucket, testutil:fixed_bin_key(KeyNumber), Value, IndexGen, []), + {TC, R} = + timer:tc( + testutil, book_riakput, [Bookie, RiakObj, IndexSpecs] + ), + case R of + ok -> + ThisProcess! {TC, 0}; + pause -> + timer:sleep(40), + ThisProcess ! {TC + 40000, 1} + end + end + ), + receive + {PutTime, Pause} -> + time_load_chunk( + Bookie, + {Bucket, Value, IndexGen}, + KeyNumber + 1, + TopKey, + TotalTime + PutTime, + PC + Pause + ) + end. counter(Bookie, full) -> {async, DataSizeCounter} = @@ -471,6 +564,42 @@ random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) -> TC div 1000. +random_people_queries(Bookie, Bucket, IndexesReturned) -> + SeventiesWillowRegex = + "[^\\|]*\\|197[0-9]{5}\\|[^\\|]*\\|" + "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*", + %% born in the 70s with Willow as a given name + QueryFun = + fun() -> + Surname = get_random_surname(), + Range = + {?PEOPLE_INDEX, + Surname, + <> + }, + {ok, TermRegex} = + re:compile(SeventiesWillowRegex), + FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, + {async, R} = + leveled_bookie:book_indexfold( + Bookie, + {Bucket, <<>>}, + {FoldKeysFun, 0}, + Range, + {true, TermRegex}), + R() + end, + + {TC, {QC, EF}} = + timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), + ct:log( + ?INFO, + "Fetch of ~w index entries by regex in ~w queries took ~w ms", + [EF, QC, TC div 1000] + ), + TC div 1000. + + run_queries(_QueryFun, QueryCount, EntriesFound, TargetEntries) when EntriesFound >= TargetEntries -> {QueryCount, EntriesFound}; @@ -486,7 +615,8 @@ profile_fun( {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned}) -> fun() -> random_queries( - Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned div 8) + Bookie, Bucket, 10, IndexCount, QuerySize, + IndexesReturned div ?MINI_QUERY_DIVISOR) end; profile_fun( {query, QuerySize}, @@ -495,6 +625,13 @@ profile_fun( random_queries( Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned) end; +profile_fun( + regex_query, + {Bookie, Bucket, _KeyCount, _ObjSize, _IndexCount, IndexesReturned}) -> + fun() -> + random_people_queries( + Bookie, Bucket, IndexesReturned div ?RGEX_QUERY_DIVISOR) + end; profile_fun( {head, HeadFetches}, {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> @@ -524,11 +661,230 @@ profile_fun( profile_fun( CounterFold, {Bookie, _Bucket, _KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> + Runs = + case CounterFold of + full -> + 20; + estimate -> + 40; + guess -> + 100 + end, fun() -> lists:foreach( fun(_I) -> _ = counter(Bookie, CounterFold) end, - lists:seq(1, 10) + lists:seq(1, Runs) ) end. + +random_people_index() -> + io_lib:format( + "~s|~s|~s|#~s#~s#~s|#~s#~s#~s", + [get_random_surname(), + get_random_dob(), + get_random_dod(), + get_random_givenname(), get_random_givenname(), get_random_givenname(), + get_random_postcode(), get_random_postcode(), get_random_postcode() + ] + ). + +get_random_surname() -> + lists:nth( + rand:uniform(100), + [<<"Smith">>, <<"Jones">>, <<"Taylor">>, <<"Brown">>, <<"Williams">>, + <<"Wilson">>, <<"Johnson">>, <<"Davies">>, <<"Patel">>, <<"Robinson">>, + <<"Wright">>, <<"Thompson">>, <<"Evans">>, <<"Walker">>, <<"White">>, + <<"Roberts">>, <<"Green">>, <<"Hall">>, <<"Thomas">>, <<"Clarke">>, + <<"Jackson">>, <<"Wood">>, <<"Harris">>, <<"Edwards">>, <<"Turner">>, + <<"Martin">>, <<"Cooper">>, <<"Hill">>, <<"Ward">>, <<"Hughes">>, + <<"Moore">>, <<"Clark">>, <<"King">>, <<"Harrison">>, <<"Lewis">>, + <<"Baker">>, <<"Lee">>, <<"Allen">>, <<"Morris">>, <<"Khan">>, + <<"Scott">>, <<"Watson">>, <<"Davis">>, <<"Parker">>, <<"James">>, + <<"Bennett">>, <<"Young">>, <<"Phillips">>, <<"Richardson">>, <<"Mitchell">>, + <<"Bailey">>, <<"Carter">>, <<"Cook">>, <<"Singh">>, <<"Shaw">>, + <<"Bell">>, <<"Collins">>, <<"Morgan">>, <<"Kelly">>, <<"Begum">>, + <<"Miller">>, <<"Cox">>, <<"Hussain">>, <<"Marshall">>, <<"Simpson">>, + <<"Price">>, <<"Anderson">>, <<"Adams">>, <<"Wilkinson">>, <<"Ali">>, + <<"Ahmed">>, <<"Foster">>, <<"Ellis">>, <<"Murphy">>, <<"Chapman">>, + <<"Mason">>, <<"Gray">>, <<"Richards">>, <<"Webb">>, <<"Griffiths">>, + <<"Hunt">>, <<"Palmer">>, <<"Campbell">>, <<"Holmes">>, <<"Mills">>, + <<"Rogers">>, <<"Barnes">>, <<"Knight">>, <<"Matthews">>, <<"Barker">>, + <<"Powell">>, <<"Stevens">>, <<"Kaur">>, <<"Fisher">>, <<"Butler">>, + <<"Dixon">>, <<"Russell">>, <<"Harvey">>, <<"Pearson">>, <<"Graham">>] + ). + +get_random_givenname() -> + lists:nth( + rand:uniform(20), + [<<"Noah">>, <<"Oliver">>, <<"George">>, <<"Arthur">>, <<"Muhammad">>, + <<"Leo">>, <<"Harry">>, <<"Oscar">> , <<"Archie">>, <<"Henry">>, + <<"Olivia">>, <<"Amelia">>, <<"Isla">>, <<"Ava">>, <<"Ivy">>, + <<"Freya">>, <<"Lily">>, <<"Florence">>, <<"Mia">>, <<"Willow">> + ]). + +get_random_dob() -> + io_lib:format( + "~4..0B~2..0B~2..0B", + [1900 + rand:uniform(99), rand:uniform(12), rand:uniform(28)] + ). + +get_random_dod() -> + io_lib:format( + "~4..0B~2..0B~2..0B", + [2000 + rand:uniform(20), rand:uniform(12), rand:uniform(28)] + ). + +get_random_postcode() -> + io_lib:format( + "LS~w ~wXX", [rand:uniform(26), rand:uniform(9)] + ). + + +memory_tracking(Phase, Timeout) -> + spawn( + fun() -> + memory_tracking(Phase, Timeout, {0, 0, 0}, 0) + end + ). + +memory_tracking(Phase, Timeout, {TAcc, PAcc, BAcc}, Loops) -> + receive + {stop, Caller} -> + {T, P, B} = memory_usage(), + TAvg = (T + TAcc) div ((Loops + 1) * 1000000), + PAvg = (P + PAcc) div ((Loops + 1) * 1000000), + BAvg = (B + BAcc) div ((Loops + 1) * 1000000), + print_memory_stats(Phase, TAvg, PAvg, BAvg), + Caller ! {TAvg, PAvg, BAvg} + after Timeout -> + {T, P, B} = memory_usage(), + memory_tracking( + Phase, Timeout, {TAcc + T, PAcc + P, BAcc + B}, Loops + 1) + end. + + +-if(?performance == riak_ctperf). +print_memory_stats(_Phase, _TAvg, _PAvg, _BAvg) -> + ok. +-else. +print_memory_stats(Phase, TAvg, PAvg, BAvg) -> + io:format( + user, + "~nFor ~w memory stats: total ~wMB process ~wMB binary ~wMB~n", + [Phase, TAvg, PAvg, BAvg] + ). +-endif. + +dummy_accountant() -> + spawn(fun() -> receive {stop, Caller} -> Caller ! ok end end). + +stop_accounting(Accountant) -> + Accountant ! {stop, self()}, + receive ok -> ok end. + +stop_tracker(Tracker) -> + garbage_collect(), + % Garbage collect the test process, before getting the memory stats + Tracker ! {stop, self()}, + receive MemStats -> MemStats end. + +-if(?ACCOUNTING). + +-define(ACCT_TYPES, [scheduler, dirty_io_scheduler, dirty_cpu_scheduler, aux]). + +accounting(Phase, Timeout, ProfileList) -> + case lists:member(Phase, ProfileList) of + true -> + ZeroCounters = + #{ + emulator => 0, + aux => 0, + check_io => 0, + gc => 0, + other => 0 + }, + InitCounters = + lists:map(fun(T) -> {T, ZeroCounters} end, ?ACCT_TYPES), + spawn( + fun() -> + accounting(Phase, Timeout, maps:from_list(InitCounters), 0) + end + ); + false -> + dummy_accountant() + end. + +accounting(Phase, Timeout, Counters, Loops) -> + receive + {stop, Caller} -> + io:format( + user, + "~n~nStats for Phase ~p after loops ~p:~n", + [Phase, Loops] + ), + lists:foreach( + fun(S) -> + scheduler_output(S, maps:get(S, Counters)) + end, + ?ACCT_TYPES + ), + Caller ! ok + after Timeout -> + msacc:start(Timeout div 5), + UpdCounters = + lists:foldl( + fun(StatMap, CountersAcc) -> + Type = maps:get(type, StatMap), + case lists:member(Type, ?ACCT_TYPES) of + true -> + TypeAcc = + maps:intersect_with( + fun(_K, V1, V2) -> V1 + V2 end, + maps:get(counters, StatMap), + maps:get(Type, CountersAcc) + ), + maps:update(Type, TypeAcc, CountersAcc); + false -> + CountersAcc + end + end, + Counters, + msacc:stats() + ), + accounting(Phase, Timeout, UpdCounters, Loops + 1) + end. + +scheduler_output(Scheduler, CounterMap) -> + Total = + maps:get(emulator, CounterMap) + + maps:get(aux, CounterMap) + + maps:get(check_io, CounterMap) + + maps:get(gc, CounterMap) + + maps:get(other, CounterMap), + GC = maps:get(gc, CounterMap), + GCperc = case Total > 0 of true -> GC/Total; false -> 0.0 end, + io:format( + user, + "~nFor ~w:~n" + "emulator=~w, aux=~w, check_io=~w, gc=~w, other=~w~n" + "total ~w~n" + "percentage_gc ~.2f %~n", + [Scheduler, + maps:get(emulator, CounterMap), + maps:get(aux, CounterMap), + maps:get(check_io, CounterMap), + GC, + maps:get(other, CounterMap), + Total, + GCperc + ] + ). + +-else. + +accounting(_Phase, _Timeout, _ProfileList) -> + dummy_accountant(). + +-endif. \ No newline at end of file diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 222a7472..8a74ac16 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -483,12 +483,14 @@ rotate_wipe_compact(Strategy1, Strategy2) -> {ok, Book3} = leveled_bookie:book_start(BookOptsAlt), {KSpcL2, _V2} = testutil:put_indexed_objects(Book3, "AltBucket6", 3000), - Q2 = fun(RT) -> {index_query, - "AltBucket6", - {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, - {RT, undefined}} - end, + Q2 = + fun(RT) -> + {index_query, + "AltBucket6", + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {RT, undefined}} + end, {async, KFolder2A} = leveled_bookie:book_returnfolder(Book3, Q2(false)), KeyList2A = lists:usort(KFolder2A()), true = length(KeyList2A) == 3000, @@ -629,12 +631,14 @@ recovr_strategy(_Config) -> true = VCH == VCG end, lists:nthtail(6400, AllSpcL)), - Q = fun(RT) -> {index_query, - "Bucket6", - {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, - {RT, undefined}} - end, + Q = + fun(RT) -> + {index_query, + "Bucket6", + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {RT, undefined}} + end, {async, TFolder} = leveled_bookie:book_returnfolder(Book1, Q(true)), KeyTermList = TFolder(), {async, KFolder} = leveled_bookie:book_returnfolder(Book1, Q(false)), @@ -660,12 +664,14 @@ recovr_strategy(_Config) -> KeyList2 = lists:usort(KFolder2()), true = length(KeyList2) == 6400, - Q2 = fun(RT) -> {index_query, - "AltBucket6", - {fun testutil:foldkeysfun/3, []}, - {"idx1_bin", "#", "|"}, - {RT, undefined}} - end, + Q2 = + fun(RT) -> + {index_query, + "AltBucket6", + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, + {RT, undefined}} + end, {async, KFolder2A} = leveled_bookie:book_returnfolder(Book2, Q2(false)), KeyList2A = lists:usort(KFolder2A()), true = length(KeyList2A) == 3000, diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 2c8dfc7e..f003be3e 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -68,7 +68,7 @@ compact_and_wait/1]). -define(RETURN_TERMS, {true, undefined}). --define(SLOWOFFER_DELAY, 10). +-define(SLOWOFFER_DELAY, 40). -define(V1_VERS, 1). -define(MAGIC, 53). % riak_kv -> riak_object -define(MD_VTAG, <<"X-Riak-VTag">>). @@ -691,21 +691,24 @@ load_objects(ChunkSize, GenList, Bookie, TestObject, Generator, SubListL) -> get_randomindexes_generator(Count) -> - Generator = fun() -> - lists:map(fun(X) -> - {add, - "idx" ++ integer_to_list(X) ++ "_bin", - get_randomdate() ++ get_randomname()} end, - lists:seq(1, Count)) + Generator = + fun() -> + lists:map( + fun(X) -> + {add, + list_to_binary("idx" ++ integer_to_list(X) ++ "_bin"), + list_to_binary(get_randomdate() ++ get_randomname())} + end, + lists:seq(1, Count)) end, Generator. name_list() -> [{1, "Sophia"}, {2, "Emma"}, {3, "Olivia"}, {4, "Ava"}, - {5, "Isabella"}, {6, "Mia"}, {7, "Zoe"}, {8, "Lily"}, - {9, "Emily"}, {10, "Madelyn"}, {11, "Madison"}, {12, "Chloe"}, - {13, "Charlotte"}, {14, "Aubrey"}, {15, "Avery"}, - {16, "Abigail"}]. + {5, "Isabella"}, {6, "Mia"}, {7, "Zoe"}, {8, "Lily"}, + {9, "Emily"}, {10, "Madelyn"}, {11, "Madison"}, {12, "Chloe"}, + {13, "Charlotte"}, {14, "Aubrey"}, {15, "Avery"}, + {16, "Abigail"}]. get_randomname() -> NameList = name_list(), @@ -738,7 +741,7 @@ check_indexed_objects(Book, B, KSpecL, V) -> fun({K, Spc}) -> {ok, O} = book_riakget(Book, B, K), V = testutil:get_value(O), - {add, "idx1_bin", IdxVal} = lists:keyfind(add, 1, Spc), + {add, <<"idx1_bin">>, IdxVal} = lists:keyfind(add, 1, Spc), {IdxVal, K} end, KSpecL), @@ -749,7 +752,7 @@ check_indexed_objects(Book, B, KSpecL, V) -> {index_query, B, {fun foldkeysfun/3, []}, - {"idx1_bin", "0", "|"}, + {<<"idx1_bin">>, <<"0">>, <<"|">>}, ?RETURN_TERMS}), SW = os:timestamp(), {async, Fldr} = R, @@ -796,11 +799,12 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i) -> put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V). put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) -> + SW = os:timestamp(), IndexGen = get_randomindexes_generator(1), - + ThisProcess = self(), FindAdditionFun = fun(SpcItem) -> element(1, SpcItem) == add end, MapFun = - fun({K, Spc}) -> + fun({K, Spc}, Acc) -> OldSpecs = lists:filter(FindAdditionFun, Spc), {RemoveSpc, AddSpc} = case RemoveOld2i of @@ -809,26 +813,45 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) -> false -> {[], OldSpecs} end, - {O, DeltaSpecs} = - set_object(Bucket, K, V, - IndexGen, RemoveSpc, AddSpc), - % DeltaSpecs should be new indexes added, and any old indexes which - % have been removed by this change where RemoveOld2i is true. - % - % The actual indexes within the object should reflect any history - % of indexes i.e. when RemoveOld2i is false. - % - % The [{Key, SpecL}] returned should accrue additions over loops if - % RemoveOld2i is false - case book_riakput(Book, O, DeltaSpecs) of - ok -> ok; - pause -> timer:sleep(?SLOWOFFER_DELAY) - end, + PutFun = + fun() -> + {O, DeltaSpecs} = + set_object( + Bucket, K, V, IndexGen, RemoveSpc, AddSpc), + % DeltaSpecs should be new indexes added, and any old + % indexes which have been removed by this change where + % RemoveOld2i is true. + % + % The actual indexes within the object should reflect any + % history of indexes i.e. when RemoveOld2i is false. + % + % The [{Key, SpecL}] returned should accrue additions over + % loops if RemoveOld2i is false + R = + case book_riakput(Book, O, DeltaSpecs) of + ok -> + ok; + pause -> + timer:sleep(?SLOWOFFER_DELAY), + pause + end, + ThisProcess ! {R, DeltaSpecs} + end, + spawn(PutFun), + AccOut = + receive + {ok, NewSpecs} -> Acc; + {pause, NewSpecs} -> Acc + 1 + end, % Note that order in the SpecL is important, as % check_indexed_objects, needs to find the latest item added - {K, DeltaSpecs ++ AddSpc} + {{K, NewSpecs ++ AddSpc}, AccOut} end, - RplKSpecL = lists:map(MapFun, KSpecL), + {RplKSpecL, Pauses} = lists:mapfoldl(MapFun, 0, KSpecL), + io:format( + "Altering ~w objects took ~w ms with ~w pauses~n", + [length(KSpecL), timer:now_diff(os:timestamp(), SW) div 1000, Pauses] + ), {RplKSpecL, V}. rotating_object_check(RootPath, B, NumberOfObjects) -> diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 16ea215d..37b5e1af 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -378,10 +378,13 @@ index_compare(_Config) -> GetTicTacTreeFun = fun(X, Bookie) -> SW = os:timestamp(), - ST = "!", - ET = "|", + ST = <<"!">>, + ET = <<"|">>, Q = {tictactree_idx, - {BucketBin, "idx" ++ integer_to_list(X) ++ "_bin", ST, ET}, + {BucketBin, + list_to_binary("idx" ++ integer_to_list(X) ++ "_bin"), + ST, + ET}, TreeSize, fun(_B, _K) -> accumulate end}, {async, Folder} = leveled_bookie:book_returnfolder(Bookie, Q), @@ -442,12 +445,14 @@ index_compare(_Config) -> true = DL2_0 == [], true = length(DL2_1) > 100, - IdxSpc = {add, "idx2_bin", "zz999"}, - {TestObj, TestSpc} = testutil:generate_testobject(BucketBin, - term_to_binary("K9.Z"), - "Value1", - [IdxSpc], - [{"MDK1", "MDV1"}]), + IdxSpc = {add, <<"idx2_bin">>, <<"zz999">>}, + {TestObj, TestSpc} = + testutil:generate_testobject( + BucketBin, + term_to_binary("K9.Z"), + "Value1", + [IdxSpc], + [{"MDK1", "MDV1"}]), ok = testutil:book_riakput(Book2C, TestObj, TestSpc), testutil:check_forobject(Book2C, TestObj), @@ -457,25 +462,30 @@ index_compare(_Config) -> TicTacTree3_P3 = GetTicTacTreeFun(2, Book2D), % Merge the tree across the partitions - TicTacTree3_Joined = lists:foldl(fun leveled_tictac:merge_trees/2, - TicTacTree3_P1, - [TicTacTree3_P2, TicTacTree3_P3]), + TicTacTree3_Joined = + lists:foldl( + fun leveled_tictac:merge_trees/2, + TicTacTree3_P1, + [TicTacTree3_P2, TicTacTree3_P3]), % Find all keys index, and then just the last key IdxQ1 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, - {"idx2_bin", "zz", "zz|"}, + {<<"idx2_bin">>, <<"zz">>, <<"zz|">>}, {true, undefined}}, {async, IdxFolder1} = leveled_bookie:book_returnfolder(Book2C, IdxQ1), true = IdxFolder1() >= 1, - DL_3to2B = leveled_tictac:find_dirtyleaves(TicTacTree2_P1, - TicTacTree3_P1), - DL_3to2C = leveled_tictac:find_dirtyleaves(TicTacTree2_P2, - TicTacTree3_P2), - DL_3to2D = leveled_tictac:find_dirtyleaves(TicTacTree2_P3, - TicTacTree3_P3), + DL_3to2B = + leveled_tictac:find_dirtyleaves( + TicTacTree2_P1, TicTacTree3_P1), + DL_3to2C = + leveled_tictac:find_dirtyleaves( + TicTacTree2_P2, TicTacTree3_P2), + DL_3to2D = + leveled_tictac:find_dirtyleaves( + TicTacTree2_P3, TicTacTree3_P3), io:format("Individual tree comparison found dirty leaves of ~w ~w ~w~n", [DL_3to2B, DL_3to2C, DL_3to2D]), @@ -509,7 +519,7 @@ index_compare(_Config) -> MismatchQ = {index_query, BucketBin, {FoldKeysIndexQFun, []}, - {"idx2_bin", "!", "|"}, + {<<"idx2_bin">>, <<"!">>, <<"|">>}, {true, undefined}}, {async, MMFldr_2A} = leveled_bookie:book_returnfolder(Book2A, MismatchQ), {async, MMFldr_2B} = leveled_bookie:book_returnfolder(Book2B, MismatchQ), @@ -531,7 +541,7 @@ index_compare(_Config) -> io:format("Differences between lists ~w~n", [Diffs]), % The actual difference is discovered - true = lists:member({"zz999", term_to_binary("K9.Z")}, Diffs), + true = lists:member({<<"zz999">>, term_to_binary("K9.Z")}, Diffs), % Without discovering too many others true = length(Diffs) < 20, From da092d01bc7229f3f2d5e2bf9e94fb47e11a258f Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 3 Sep 2024 16:34:41 +0100 Subject: [PATCH 2/2] Make tictac more efficient by making level1 a map (#441) * Make tictac more efficient by making level1 a map Pre-change (1M keys, tree size large): Generating Keys took 2513 milliseconds Memory footprint [{total,356732576},{processes,334051328},{processes_used,334044488},{system,22681248},{atom,540873},{atom_used,524383},{binary,1015120},{code,9692859},{ets,721496}] Generating new tree took 1 milliseconds Loading tree took 27967 milliseconds Memory footprint [{total,36733040},{processes,8875472},{processes_used,8875048},{system,27857568},{atom,540873},{atom_used,524449},{binary,6236480},{code,9692859},{ets,721496}] Exporting tree took 434 milliseconds Importing tree took 100 milliseconds Memory footprint [{total,155941512},{processes,123734808},{processes_used,123734384},{system,32206704},{atom,540873},{atom_used,524449},{binary,10401144},{code,9692859},{ets,721496}] Garbage collect Memory footprint [{total,39660504},{processes,8257520},{processes_used,8256968},{system,31402984},{atom,540873},{atom_used,524449},{binary,9781760},{code,9692859},{ets,721496}] Post change: Generating Keys took 2416 milliseconds Memory footprint [{total,284678120},{processes,258349528},{processes_used,257758568},{system,26328592},{atom,893161},{atom_used,878150},{binary,1013880},{code,11770188},{ets,774224}] Generating new tree took 0 milliseconds Loading tree took 2072 milliseconds Memory footprint [{total,49957448},{processes,17244856},{processes_used,16653896},{system,32712592},{atom,893161},{atom_used,878216},{binary,7397496},{code,11770188},{ets,774224}] Exporting tree took 448 milliseconds Importing tree took 108 milliseconds Memory footprint [{total,46504880},{processes,11197344},{processes_used,10606384},{system,35307536},{atom,893161},{atom_used,878216},{binary,9992112},{code,11770188},{ets,774224}] Garbage collect Memory footprint [{total,47394048},{processes,12223608},{processes_used,11632520},{system,35170440},{atom,893161},{atom_used,878216},{binary,9855008},{code,11770188},{ets,774224}] * Tidy-up * Add type * Remove ++ requiring copy of Acc Rely on mechanism producing a sorted result, not sorting * Update src/leveled_tictac.erl Co-authored-by: Thomas Arts * Update following review --------- Co-authored-by: Thomas Arts --- src/leveled_penciller.erl | 8 +- src/leveled_tictac.erl | 291 ++++++++++++++++++++++++++++++-------- 2 files changed, 230 insertions(+), 69 deletions(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index eeb358fa..4c9e7cbb 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -159,7 +159,7 @@ -behaviour(gen_server). --include("include/leveled.hrl"). +-include("leveled.hrl"). -export([ init/1, @@ -207,23 +207,17 @@ -export([clean_testdir/1]). -endif. --define(MAX_WORK_WAIT, 300). -define(MANIFEST_FP, "ledger_manifest"). -define(FILES_FP, "ledger_files"). --define(CURRENT_FILEX, "crr"). --define(PENDING_FILEX, "pnd"). -define(SST_FILEX, ".sst"). -define(ARCHIVE_FILEX, ".bak"). -define(SUPER_MAX_TABLE_SIZE, 40000). --define(PROMPT_WAIT_ONL0, 5). -define(WORKQUEUE_BACKLOG_TOLERANCE, 4). -define(COIN_SIDECOUNT, 4). -define(SLOW_FETCH, 500000). % Log a very slow fetch - longer than 500ms -define(FOLD_SCANWIDTH, 32). -define(ITERATOR_SCANWIDTH, 4). -define(ITERATOR_MINSCANWIDTH, 1). --define(TIMING_SAMPLECOUNTDOWN, 10000). --define(TIMING_SAMPLESIZE, 100). -define(SHUTDOWN_LOOPS, 10). -define(SHUTDOWN_PAUSE, 10000). % How long to wait for snapshots to be released on shutdown diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index e84c46a3..d4b07985 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -52,8 +52,6 @@ -module(leveled_tictac). --include("include/leveled.hrl"). - -export([ new_tree/1, new_tree/2, @@ -102,10 +100,12 @@ size :: tree_size(), width :: integer(), segment_count :: integer(), - level1 :: binary(), - level2 :: any() % an array - but OTP compatibility + level1 :: level1_map(), + level2 :: array:array() }). +-type level1_map() :: #{non_neg_integer() => binary()}. + -type tictactree() :: #tictactree{}. -type segment48() :: @@ -114,6 +114,11 @@ {binary(), integer(), integer(), integer(), binary()}. -type tree_size() :: xxsmall|xsmall|small|medium|large|xlarge. +-type bin_extract_fun() + :: + fun((term(), term()) -> + {binary(), binary()|{is_hash, non_neg_integer()}} + ). -export_type([tictactree/0, segment48/0, tree_size/0]). @@ -137,7 +142,7 @@ new_tree(TreeID) -> new_tree(TreeID, Size) -> Width = get_size(Size), Lv1Width = Width * ?HASH_SIZE * 8, - Lv1Init = <<0:Lv1Width/integer>>, + Lv1Init = to_level1_map(<<0:Lv1Width/integer>>), Lv2Init = array:new([{size, Width}, {default, ?EMPTY}]), #tictactree{treeID = TreeID, size = Size, @@ -159,9 +164,16 @@ export_tree(Tree) -> L2 = lists:foldl(EncodeL2Fun, [], lists:seq(0, Tree#tictactree.width - 1)), {struct, - [{<<"level1">>, base64:encode_to_string(Tree#tictactree.level1)}, - {<<"level2">>, {struct, lists:reverse(L2)}} - ]}. + [{<<"level1">>, + base64:encode_to_string( + from_level1_map(Tree#tictactree.level1) + ) + }, + {<<"level2">>, + {struct, lists:reverse(L2)} + } + ] + }. -spec import_tree({struct, list()}) -> tictactree(). %% @doc @@ -174,8 +186,9 @@ import_tree(ExportedTree) -> Sizes = lists:map(fun(SizeTag) -> {SizeTag, get_size(SizeTag)} end, ?VALID_SIZES), Width = byte_size(L1Bin) div ?HASH_SIZE, - {Size, Width} = lists:keyfind(Width, 2, Sizes), - Width = get_size(Size), + {Size, _Width} = lists:keyfind(Width, 2, Sizes), + %% assert that side is indeed the provided width + true = get_size(Size) == Width, Lv2Init = array:new([{size, Width}]), FoldFun = fun({X, EncodedL2SegBin}, L2Array) -> @@ -183,15 +196,18 @@ import_tree(ExportedTree) -> array:set(binary_to_integer(X), L2SegBin, L2Array) end, Lv2 = lists:foldl(FoldFun, Lv2Init, L2List), - #tictactree{treeID = import, - size = Size, - width = Width, - segment_count = Width * ?L2_CHUNKSIZE, - level1 = L1Bin, - level2 = Lv2}. - - --spec add_kv(tictactree(), term(), term(), fun()) -> tictactree(). + garbage_collect(), + #tictactree{ + treeID = import, + size = Size, + width = Width, + segment_count = Width * ?L2_CHUNKSIZE, + level1 = to_level1_map(L1Bin), + level2 = Lv2 + }. + + +-spec add_kv(tictactree(), term(), term(), bin_extract_fun()) -> tictactree(). %% @doc %% Add a Key and value to a tictactree using the BinExtractFun to extract a %% binary from the Key and value from which to generate the hash. The @@ -200,8 +216,9 @@ import_tree(ExportedTree) -> add_kv(TicTacTree, Key, Value, BinExtractFun) -> add_kv(TicTacTree, Key, Value, BinExtractFun, false). --spec add_kv(tictactree(), term(), term(), fun(), boolean()) - -> tictactree()|{tictactree(), integer()}. +-spec add_kv( + tictactree(), term(), term(), bin_extract_fun(), boolean()) + -> tictactree()|{tictactree(), integer()}. %% @doc %% add_kv with ability to return segment ID of Key added add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) -> @@ -215,14 +232,15 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) -> SegLeaf2Upd = SegLeaf2 bxor SegChangeHash, SegLeaf1Upd = SegLeaf1 bxor SegChangeHash, + UpdatedTree = + replace_segment( + SegLeaf1Upd, SegLeaf2Upd, L1Extract, L2Extract, TicTacTree + ), case ReturnSegment of true -> - {replace_segment(SegLeaf1Upd, SegLeaf2Upd, - L1Extract, L2Extract, TicTacTree), - Segment}; + {UpdatedTree, Segment}; false -> - replace_segment(SegLeaf1Upd, SegLeaf2Upd, - L1Extract, L2Extract, TicTacTree) + UpdatedTree end. -spec alter_segment(integer(), integer(), tictactree()) -> tictactree(). @@ -241,8 +259,9 @@ alter_segment(Segment, Hash, Tree) -> %% Returns a list of segment IDs which hold differences between the state %% represented by the two trees. find_dirtyleaves(SrcTree, SnkTree) -> - Size = SrcTree#tictactree.size, - Size = SnkTree#tictactree.size, + SizeSrc = SrcTree#tictactree.size, + SizeSnk = SnkTree#tictactree.size, + true = SizeSrc == SizeSnk, IdxList = find_dirtysegments(fetch_root(SrcTree), fetch_root(SnkTree)), SrcLeaves = fetch_leaves(SrcTree, IdxList), @@ -250,12 +269,19 @@ find_dirtyleaves(SrcTree, SnkTree) -> FoldFun = fun(Idx, Acc) -> - {Idx, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), - {Idx, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves), + {_, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), + {_, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves), L2IdxList = segmentcompare(SrcLeaf, SnkLeaf), - Acc ++ lists:map(fun(X) -> X + Idx * ?L2_CHUNKSIZE end, L2IdxList) + lists:foldl( + fun(X, InnerAcc) -> + SegID = X + Idx * ?L2_CHUNKSIZE, + [SegID|InnerAcc] + end, + Acc, + L2IdxList) end, - lists:sort(lists:foldl(FoldFun, [], IdxList)). + %% Output not sorted, as sorted by the design of the construction process + lists:foldl(FoldFun, [], IdxList). -spec find_dirtysegments(binary(), binary()) -> list(integer()). %% @doc @@ -268,7 +294,7 @@ find_dirtysegments(SrcBin, SinkBin) -> %% @doc %% Return the level1 binary for a tree. fetch_root(TicTacTree) -> - TicTacTree#tictactree.level1. + from_level1_map(TicTacTree#tictactree.level1). -spec fetch_leaves(tictactree(), list(integer())) -> list(). %% @doc @@ -303,15 +329,21 @@ merge_trees(TreeA, TreeB) -> NewLevel2 = merge_binaries(L2A, L2B), array:set(SQN, NewLevel2, MergeL2) end, - NewLevel2 = lists:foldl(MergeFun, - MergedTree#tictactree.level2, - lists:seq(0, MergedTree#tictactree.width - 1)), + NewLevel2 = + lists:foldl( + MergeFun, + MergedTree#tictactree.level2, + lists:seq(0, MergedTree#tictactree.width - 1) + ), - MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}. - --spec get_segment(integer(), - integer()|xxsmall|xsmall|small|medium|large|xlarge) -> - integer(). + MergedTree#tictactree{ + level1 = to_level1_map(NewLevel1), + level2 = NewLevel2 + }. + +-spec get_segment( + integer(), + integer()|xxsmall|xsmall|small|medium|large|xlarge) -> integer(). %% @doc %% Return the segment ID for a Key. Can pass the tree size or the actual %% segment count derived from the size @@ -339,8 +371,8 @@ tictac_hash(BinKey, Val) when is_binary(BinKey) -> end, {HashKeyToSeg, AltHashKey bxor HashVal}. --spec keyto_doublesegment32(binary()) - -> {non_neg_integer(), non_neg_integer()}. +-spec keyto_doublesegment32( + binary()) -> {non_neg_integer(), non_neg_integer()}. %% @doc %% Used in tictac_hash/2 to provide an alternative hash of the key to bxor with %% the value, as well as the segment hash to locate the leaf of the tree to be @@ -372,8 +404,8 @@ keyto_segment48(BinKey) -> _Rest/binary>> = crypto:hash(md5, BinKey), {segment_hash, SegmentID, ExtraHash, AltHash}. --spec generate_segmentfilter_list(list(integer()), tree_size()) - -> false|list(integer()). +-spec generate_segmentfilter_list( + list(integer()), tree_size()) -> false|list(integer()). %% @doc %% Cannot accelerate segment listing for trees below certain sizes, so check %% the creation of segment filter lists with this function @@ -398,8 +430,8 @@ generate_segmentfilter_list(SegmentList, Size) -> SegmentList end. --spec adjust_segmentmatch_list(list(integer()), tree_size(), tree_size()) - -> list(integer()). +-spec adjust_segmentmatch_list( + list(integer()), tree_size(), tree_size()) -> list(integer()). %% @doc %% If we have dirty segments discovered by comparing trees of size CompareSize, %% and we want to see if it matches a segment for a key which was created for a @@ -441,8 +473,8 @@ adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> end. --spec match_segment({integer(), tree_size()}, {integer(), tree_size()}) - -> boolean(). +-spec match_segment( + {integer(), tree_size()}, {integer(), tree_size()}) -> boolean(). %% @doc %% Does segment A match segment B - given that segment A was generated using %% Tree size A and segment B was generated using Tree Size B @@ -462,8 +494,29 @@ join_segment(BranchID, LeafID) -> %%% Internal functions %%%============================================================================ --spec extract_segment(integer(), tictactree()) -> - {integer(), integer(), tree_extract(), tree_extract()}. +-spec to_level1_map(binary()) -> level1_map(). +to_level1_map(L1Bin) -> + to_level1_map_loop(L1Bin, maps:new(), 0). + +to_level1_map_loop(<<>>, L1MapAcc, _Idx) -> + L1MapAcc; +to_level1_map_loop(<>, L1MapAcc, Idx) -> + to_level1_map_loop(Rest, maps:put(Idx, Slice, L1MapAcc), Idx + 1). + + +-spec from_level1_map(level1_map()) -> binary(). +from_level1_map(L1Map) -> + lists:foldl( + fun(I, Acc) -> + <> + end, + <<>>, + lists:seq(0, maps:size(L1Map) - 1) + ). + +-spec extract_segment( + integer(), tictactree()) -> + {integer(), integer(), tree_extract(), tree_extract()}. %% @doc %% Extract the Level 1 and Level 2 slices from a tree to prepare an update extract_segment(Segment, TicTacTree) -> @@ -472,9 +525,10 @@ extract_segment(Segment, TicTacTree) -> Level1Pos = (Segment bsr ?L2_BITSIZE) band (TicTacTree#tictactree.width - 1), + Level1Slice = Level1Pos div 16, Level2BytePos = ?HASH_SIZE * Level2Pos, - Level1BytePos = ?HASH_SIZE * Level1Pos, + Level1BytePos = ?HASH_SIZE * (Level1Pos rem 16), Level2 = get_level2(TicTacTree, Level1Pos), @@ -484,7 +538,7 @@ extract_segment(Segment, TicTacTree) -> PostL2/binary>> = Level2, <> = TicTacTree#tictactree.level1, + PostL1/binary>> = maps:get(Level1Slice, TicTacTree#tictactree.level1), {SegLeaf1, SegLeaf2, @@ -492,25 +546,26 @@ extract_segment(Segment, TicTacTree) -> {PreL2, Level2BytePos, Level2Pos, HashIntLength, PostL2}}. --spec replace_segment(integer(), integer(), - tree_extract(), tree_extract(), - tictactree()) -> tictactree(). +-spec replace_segment( + integer(), integer(), tree_extract(), tree_extract(), tictactree()) -> + tictactree(). %% @doc %% Replace a slice of a tree replace_segment(L1Hash, L2Hash, L1Extract, L2Extract, TicTacTree) -> {PreL1, Level1BytePos, Level1Pos, HashIntLength, PostL1} = L1Extract, {PreL2, Level2BytePos, _Level2Pos, HashIntLength, PostL2} = L2Extract, + Level1Slice = Level1Pos div 16, + Level1Upd = <>, Level2Upd = <>, - TicTacTree#tictactree{level1 = Level1Upd, - level2 = array:set(Level1Pos, - Level2Upd, - TicTacTree#tictactree.level2)}. + TicTacTree#tictactree{ + level1 = maps:put(Level1Slice, Level1Upd, TicTacTree#tictactree.level1), + level2 = array:set(Level1Pos, Level2Upd, TicTacTree#tictactree.level2)}. get_level2(TicTacTree, L1Pos) -> case array:get(L1Pos, TicTacTree#tictactree.level2) of @@ -553,7 +608,7 @@ segmentcompare(SrcBin, SnkBin, Acc, Counter) -> <> = SrcBin, <> = SnkBin, case SrcHash of - SnkHash -> + H when H == SnkHash -> segmentcompare(SrcTail, SnkTail, Acc, Counter + 1); _ -> segmentcompare(SrcTail, SnkTail, [Counter|Acc], Counter + 1) @@ -576,7 +631,7 @@ merge_binaries(BinA, BinB) -> -include_lib("eunit/include/eunit.hrl"). checktree(TicTacTree) -> - checktree(TicTacTree#tictactree.level1, TicTacTree, 0). + checktree(from_level1_map(TicTacTree#tictactree.level1), TicTacTree, 0). checktree(<<>>, TicTacTree, Counter) -> true = TicTacTree#tictactree.width == Counter; @@ -656,6 +711,7 @@ simple_test_withsize(Size) -> DL0 = find_dirtyleaves(Tree1, Tree0), ?assertMatch(true, lists:member(GetSegFun(K1), DL0)), DL1 = find_dirtyleaves(Tree3, Tree1), + ?assertMatch(DL1, lists:sort(DL1)), ?assertMatch(true, lists:member(GetSegFun(K2), DL1)), ?assertMatch(true, lists:member(GetSegFun(K3), DL1)), ?assertMatch(false, lists:member(GetSegFun(K1), DL1)), @@ -665,6 +721,53 @@ simple_test_withsize(Size) -> ImpTree3 = import_tree(ExpTree3), ?assertMatch(DL1, find_dirtyleaves(ImpTree3, Tree1)). +dirtyleaves_sorted_test() -> + Tree0 = new_tree(test, large), + KVL1 = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(1, 50000) + ), + KVL2 = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(100000, 150000) + ), + Tree1 = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree0, + KVL1 + ), + Tree2 = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree0, + KVL2 + ), + SW0 = os:system_time(millisecond), + DL1 = find_dirtyleaves(Tree1, Tree2), + DL2 = find_dirtyleaves(Tree2, Tree1), + io:format( + user, + "Finding approx 100K dirty leaves twice in ~w milliseconds~n", + [os:system_time(millisecond) - SW0] + ), + ?assertMatch(DL1, lists:sort(DL1)), + ?assertMatch(DL2, lists:sort(DL2)), + ?assertMatch(DL1, DL2). + + merge_bysize_small_test() -> merge_test_withsize(small). @@ -870,6 +973,70 @@ find_dirtysegments_withanemptytree_test() -> ?assertMatch(ExpectedAnswer, find_dirtysegments(fetch_root(T3), <<>>)). +tictac_perf_test_() -> + {timeout, 120, fun tictac_perf_tester_multi/0}. + +tictac_perf_tester_multi() -> + tictac_perf_tester(1000000, large), + tictac_perf_tester(40000, small). + +tictac_perf_tester(KeyCount, TreeSize) -> + io:format(user, "Testing with Tree Size ~w~n", [TreeSize]), + io:format(user, "Generating ~w Keys and Hashes~n", [KeyCount]), + SW0 = os:system_time(millisecond), + KVL = + lists:map( + fun(I) -> + {{o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))}} + end, + lists:seq(1, KeyCount) + ), + + SW1 = os:system_time(millisecond), + io:format(user, "Generating Keys took ~w milliseconds~n", [SW1 - SW0]), + + Tree = new_tree(test, TreeSize), + log_memory_footprint(), + + SW2 = os:system_time(millisecond), + io:format(user, "Generating new tree took ~w milliseconds~n", [SW2 - SW1]), + + UpdTree = + lists:foldl( + fun({K, V}, Acc) -> + add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end) + end, + Tree, + KVL + ), + + SW3 = os:system_time(millisecond), + io:format(user, "Loading tree took ~w milliseconds~n", [SW3 - SW2]), + log_memory_footprint(), + + ExportedTree = export_tree(UpdTree), + + SW4 = os:system_time(millisecond), + io:format(user, "Exporting tree took ~w milliseconds~n", [SW4 - SW3]), + + ImportedTree = import_tree(ExportedTree), + + SW5 = os:system_time(millisecond), + io:format(user, "Importing tree took ~w milliseconds~n", [SW5 - SW4]), + + log_memory_footprint(), + + ?assertMatch([], find_dirtyleaves(UpdTree, ImportedTree)). + +to_key(N) -> + list_to_binary(io_lib:format("K~8..0B", [N])). + +to_bucket(N) -> + list_to_binary(io_lib:format("B~8..0B", [N])). + +log_memory_footprint() -> + io:format(user, "Memory footprint ~0p~n", [erlang:memory()]). -endif.