From d45356a4f748ce06cd62fb0221f50658ccfd8588 Mon Sep 17 00:00:00 2001
From: Martin Sumner <martin.sumner@adaptip.co.uk>
Date: Mon, 15 Jul 2024 20:49:21 +0100
Subject: [PATCH 1/2] Extend perf_SUITE (#434)

* Extend perf_SUITE

This is v6 of the perf_SUITE tests.  The test adds a complex index entry to every object, and then adds a new test phase to test regex queries.

There are three profiles added so the full, mini and profiling versions of perf_SUITE can be run without having to edit the file itself:

e.g. ./rebar3 as perf_mini do ct --suite=test/end_to_end/perf_SUITE

When testing as `perf_prof` summarised versions of the eprof results are now printed to screen.

The volume of keys within the full test suite has been dropped ... just to make life easier so that test run times are not excessively increase by the new features.

* Load chunk in spawned processes

Assume to make the job of gs easier - name makes a massive difference to load time in OTP 24.

* Correctly account for pause

alos try and improve test stability by increasing pause

* Add microstate accounting to profile

* Add memory tracking during test phases

Identify and log out memory usage by test phase

* Use macros instead (#437)

* Don't print memory to screen in standard ct test

---------

Co-authored-by: Thomas Arts <thomas.arts@quviq.com>
---
 rebar.config                       |   5 +-
 test/end_to_end/iterator_SUITE.erl | 380 ++++++++++----------
 test/end_to_end/perf_SUITE.erl     | 554 +++++++++++++++++++++++------
 test/end_to_end/recovery_SUITE.erl |  42 ++-
 test/end_to_end/testutil.erl       |  87 +++--
 test/end_to_end/tictac_SUITE.erl   |  52 +--
 6 files changed, 749 insertions(+), 371 deletions(-)

diff --git a/rebar.config b/rebar.config
index 43c930b3..f0b17482 100644
--- a/rebar.config
+++ b/rebar.config
@@ -22,7 +22,10 @@
     {plugins, [rebar_eqc]}
    ]},
   {test, [{extra_src_dirs, ["test/end_to_end", "test/property"]}
-   ]}
+   ]},
+  {perf_full, [{erl_opts, [{d, performance, riak_fullperf}]}]},
+  {perf_mini, [{erl_opts, [{d, performance, riak_miniperf}]}]},
+  {perf_prof, [{erl_opts, [{d, performance, riak_profileperf}]}]}
  ]}.
 
 {deps, [
diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl
index 217d209f..ef5efdf0 100644
--- a/test/end_to_end/iterator_SUITE.erl
+++ b/test/end_to_end/iterator_SUITE.erl
@@ -213,11 +213,12 @@ breaking_folds(_Config) ->
     % Find all keys index, and then same again but stop at a midpoint using a
     % throw
     {async, IdxFolder} =
-        leveled_bookie:book_indexfold(Bookie1,
-                                        list_to_binary("Bucket"), 
-                                        {fun testutil:foldkeysfun/3, []}, 
-                                        {"idx1_bin", "#", "|"},
-                                        {true, undefined}),
+        leveled_bookie:book_indexfold(
+            Bookie1,
+            list_to_binary("Bucket"), 
+            {fun testutil:foldkeysfun/3, []}, 
+            {<<"idx1_bin">>, <<"#">>, <<"|">>},
+            {true, undefined}),
     KeyList1 = lists:reverse(IdxFolder()),
     io:format("Index fold with result size ~w~n", [length(KeyList1)]),
     true = KeyCount == length(KeyList1),
@@ -235,11 +236,12 @@ breaking_folds(_Config) ->
             end
         end,
     {async, IdxFolderToMidK} =
-        leveled_bookie:book_indexfold(Bookie1,
-                                        list_to_binary("Bucket"), 
-                                        {FoldKeyThrowFun, []}, 
-                                        {"idx1_bin", "#", "|"},
-                                        {true, undefined}),
+        leveled_bookie:book_indexfold(
+            Bookie1,
+            list_to_binary("Bucket"), 
+            {FoldKeyThrowFun, []}, 
+            {<<"idx1_bin">>, <<"#">>, <<"|">>},
+            {true, undefined}),
     CatchingFold =
         fun(AsyncFolder) ->
             try
@@ -261,10 +263,8 @@ breaking_folds(_Config) ->
             [{K, Size}|Acc]
         end,
     {async, HeadFolder} = 
-        leveled_bookie:book_headfold(Bookie1,
-                                        ?RIAK_TAG, 
-                                        {HeadFoldFun, []}, 
-                                        true, true, false),
+        leveled_bookie:book_headfold(
+            Bookie1, ?RIAK_TAG,  {HeadFoldFun, []}, true, true, false),
     KeySizeList1 = lists:reverse(HeadFolder()),
     io:format("Head fold with result size ~w~n", [length(KeySizeList1)]),
     true = KeyCount == length(KeySizeList1),
@@ -472,11 +472,9 @@ small_load_with2i(_Config) ->
     testutil:check_forobject(Bookie1, TestObject),
     ObjectGen = testutil:get_compressiblevalue_andinteger(),
     IndexGen = testutil:get_randomindexes_generator(8),
-    ObjL1 = testutil:generate_objects(10000,
-                                        uuid,
-                                        [],
-                                        ObjectGen,
-                                        IndexGen),
+    ObjL1 =
+        testutil:generate_objects(
+            10000, uuid, [], ObjectGen, IndexGen),
     testutil:riakload(Bookie1, ObjL1),
     ChkList1 = lists:sublist(lists:sort(ObjL1), 100),
     testutil:check_forlist(Bookie1, ChkList1),
@@ -486,7 +484,7 @@ small_load_with2i(_Config) ->
     IdxQ1 = {index_query,
                 "Bucket",
                 {fun testutil:foldkeysfun/3, []},
-                {"idx1_bin", "#", "|"},
+                {<<"idx1_bin">>, <<"#">>, <<"|">>},
                 {true, undefined}},
     {async, IdxFolder} = leveled_bookie:book_returnfolder(Bookie1, IdxQ1),
     KeyList1 = lists:usort(IdxFolder()),
@@ -495,7 +493,7 @@ small_load_with2i(_Config) ->
     IdxQ2 = {index_query,
                 {"Bucket", LastKey},
                 {fun testutil:foldkeysfun/3, []},
-                {"idx1_bin", LastTerm, "|"},
+                {<<"idx1_bin">>, LastTerm, <<"|">>},
                 {false, undefined}},
     {async, IdxFolderLK} = leveled_bookie:book_returnfolder(Bookie1, IdxQ2),
     KeyList2 = lists:usort(IdxFolderLK()),
@@ -530,12 +528,14 @@ small_load_with2i(_Config) ->
                                                       {FoldObjectsFun, []},
                                                       false),
     KeyHashList2 = HTreeF2(),
-    {async, HTreeF3} = leveled_bookie:book_objectfold(Bookie1,
-                                                      ?RIAK_TAG,
-                                                      "Bucket",
-                                                      {"idx1_bin", "#", "|"},
-                                                      {FoldObjectsFun, []},
-                                                      false),
+    {async, HTreeF3} =
+        leveled_bookie:book_objectfold(
+            Bookie1,
+            ?RIAK_TAG,
+            "Bucket",
+            {<<"idx1_bin">>, <<"#">>, <<"|">>},
+            {FoldObjectsFun, []},
+            false),
     KeyHashList3 = HTreeF3(),
     true = 9901 == length(KeyHashList1), % also includes the test object
     true = 9900 == length(KeyHashList2),
@@ -585,96 +585,86 @@ small_load_with2i(_Config) ->
 
 query_count(_Config) ->
     RootPath = testutil:reset_filestructure(),
-    {ok, Book1} = leveled_bookie:book_start(RootPath,
-                                            2000,
-                                            50000000,
-                                            testutil:sync_strategy()),
+    {ok, Book1} =
+        leveled_bookie:book_start(
+            RootPath, 2000, 50000000, testutil:sync_strategy()),
     BucketBin = list_to_binary("Bucket"),
-    {TestObject, TestSpec} = testutil:generate_testobject(BucketBin,
-                                                            term_to_binary("Key1"),
-                                                            "Value1",
-                                                            [],
-                                                            [{"MDK1", "MDV1"}]),
+    {TestObject, TestSpec} =
+        testutil:generate_testobject(
+            BucketBin, term_to_binary("Key1"), "Value1", [], [{"MDK1", "MDV1"}]),
     ok = testutil:book_riakput(Book1, TestObject, TestSpec),
     testutil:check_forobject(Book1, TestObject),
     testutil:check_formissingobject(Book1, "Bucket1", "Key2"),
     testutil:check_forobject(Book1, TestObject),
-    lists:foreach(fun(_X) ->
-                        V = testutil:get_compressiblevalue(),
-                        Indexes = testutil:get_randomindexes_generator(8),
-                        SW = os:timestamp(),
-                        ObjL1 = testutil:generate_objects(10000,
-                                                            binary_uuid,
-                                                            [],
-                                                            V,
-                                                            Indexes),
-                        testutil:riakload(Book1, ObjL1),
-                        io:format("Put of 10000 objects with 8 index entries "
-                                        ++
-                                        "each completed in ~w microseconds~n",
-                                    [timer:now_diff(os:timestamp(), SW)])
-                        end,
-                        lists:seq(1, 8)),
+    lists:foreach(
+        fun(_X) ->
+            V = testutil:get_compressiblevalue(),
+            Indexes = testutil:get_randomindexes_generator(8),
+            SW = os:timestamp(),
+            ObjL1 = testutil:generate_objects(10000,
+                                                binary_uuid,
+                                                [],
+                                                V,
+                                                Indexes),
+            testutil:riakload(Book1, ObjL1),
+            io:format(
+                "Put of 10000 objects with 8 index entries "
+                "each completed in ~w microseconds~n",
+                [timer:now_diff(os:timestamp(), SW)])
+        end,
+        lists:seq(1, 8)),
     testutil:check_forobject(Book1, TestObject),
-    Total = lists:foldl(fun(X, Acc) ->
-                                IdxF = "idx" ++ integer_to_list(X) ++ "_bin",
-                                T = count_termsonindex(BucketBin,
-                                                        IdxF,
-                                                        Book1,
-                                                        ?KEY_ONLY),
-                                io:format("~w terms found on index ~s~n",
-                                            [T, IdxF]),
-                                Acc + T
-                                end,
-                            0,
-                            lists:seq(1, 8)),
-    ok = case Total of
-                640000 ->
-                    ok
+    Total =
+        lists:foldl(
+            fun(X, Acc) ->
+                IdxF = "idx" ++ integer_to_list(X) ++ "_bin",
+                T =
+                    count_termsonindex(
+                        BucketBin, list_to_binary(IdxF), Book1, ?KEY_ONLY),
+                io:format("~w terms found on index ~s~n", [T, IdxF]),
+                Acc + T
             end,
-    Index1Count = count_termsonindex(BucketBin,
-                                        "idx1_bin",
-                                        Book1,
-                                        ?KEY_ONLY),
+            0,
+            lists:seq(1, 8)),
+    true = Total == 640000,
+    Index1Count =
+        count_termsonindex(
+            BucketBin, <<"idx1_bin">>, Book1, ?KEY_ONLY),
     ok = leveled_bookie:book_close(Book1),
-    {ok, Book2} = leveled_bookie:book_start(RootPath,
-                                            1000,
-                                            50000000,
-                                            testutil:sync_strategy()),
-    Index1Count = count_termsonindex(BucketBin,
-                                        "idx1_bin",
-                                        Book2,
-                                        ?KEY_ONLY),
+    {ok, Book2} =
+        leveled_bookie:book_start(
+            RootPath, 1000, 50000000, testutil:sync_strategy()),
+    Index1Count =
+        count_termsonindex(
+            BucketBin, <<"idx1_bin">>, Book2, ?KEY_ONLY),
     NameList = testutil:name_list(),
-    TotalNameByName = lists:foldl(fun({_X, Name}, Acc) ->
-                                        {ok, Regex} = re:compile("[0-9]+" ++
-                                                                    Name),
-                                        SW = os:timestamp(),
-                                        T = count_termsonindex(BucketBin,
-                                                                "idx1_bin",
-                                                                Book2,
-                                                                {false,
-                                                                    Regex}),
-                                        TD = timer:now_diff(os:timestamp(),
-                                                                SW),
-                                        io:format("~w terms found on " ++
-                                                    "index idx1 with a " ++
-                                                    "regex in ~w " ++
-                                                    "microseconds~n",
-                                                    [T, TD]),
-                                        Acc + T
-                                        end,
-                                    0,
-                                    NameList),
-    ok = case TotalNameByName of
-                Index1Count ->
-                    ok
+    TotalNameByName =
+        lists:foldl(
+            fun({_X, Name}, Acc) ->
+                {ok, Regex} =
+                    re:compile("[0-9]+" ++ Name),
+                SW = os:timestamp(),
+                T =
+                    count_termsonindex(
+                        BucketBin,
+                        list_to_binary("idx1_bin"),
+                        Book2,
+                        {false, Regex}),
+                TD = timer:now_diff(os:timestamp(), SW),
+                io:format(
+                    "~w terms found on  index idx1 with a "
+                    "regex in ~w  microseconds~n",
+                    [T, TD]),
+                Acc + T
             end,
+            0,
+            NameList),
+    true = TotalNameByName == Index1Count,
     {ok, RegMia} = re:compile("[0-9]+Mia"),
     Query1 = {index_query,
                 BucketBin,
                 {fun testutil:foldkeysfun/3, []},
-                {"idx2_bin", "2000", "2000|"},
+                {<<"idx2_bin">>, <<"2000">>, <<"2000|">>},
                 {false, RegMia}},
     {async,
         Mia2KFolder1} = leveled_bookie:book_returnfolder(Book2, Query1),
@@ -682,7 +672,7 @@ query_count(_Config) ->
     Query2 = {index_query,
                 BucketBin,
                 {fun testutil:foldkeysfun/3, []},
-                {"idx2_bin", "2000", "2001"},
+                {<<"idx2_bin">>, <<"2000">>, <<"2001">>},
                 {true, undefined}},
     {async,
         Mia2KFolder2} = leveled_bookie:book_returnfolder(Book2, Query2),
@@ -705,7 +695,7 @@ query_count(_Config) ->
     Query3 = {index_query,
                 BucketBin,
                 {fun testutil:foldkeysfun/3, []},
-                {"idx2_bin", "1980", "2100"},
+                {<<"idx2_bin">>, <<"1980">>, <<"2100">>},
                 {false, RxMia2K}},
     {async,
         Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3),
@@ -713,26 +703,26 @@ query_count(_Config) ->
     
     V9 = testutil:get_compressiblevalue(),
     Indexes9 = testutil:get_randomindexes_generator(8),
-    [{_RN, Obj9, Spc9}] = testutil:generate_objects(1,
-                                                    binary_uuid,
-                                                    [],
-                                                    V9,
-                                                    Indexes9),
+    [{_RN, Obj9, Spc9}] =
+        testutil:generate_objects(
+            1, binary_uuid, [], V9, Indexes9),
     ok = testutil:book_riakput(Book2, Obj9, Spc9),
-    R9 = lists:map(fun({add, IdxF, IdxT}) ->
-                        Q = {index_query,
-                                BucketBin,
-                                {fun testutil:foldkeysfun/3, []},
-                                {IdxF, IdxT, IdxT},
-                                ?KEY_ONLY},
-                        R = leveled_bookie:book_returnfolder(Book2, Q),
-                        {async, Fldr} = R,
-                        case length(Fldr()) of
-                            X when X > 0 ->
-                                {IdxF, IdxT, X}
-                        end
-                        end,
-                    Spc9),
+    R9 =
+        lists:map(
+            fun({add, IdxF, IdxT}) ->
+                Q = {index_query,
+                        BucketBin,
+                        {fun testutil:foldkeysfun/3, []},
+                        {IdxF, IdxT, IdxT},
+                        ?KEY_ONLY},
+                R = leveled_bookie:book_returnfolder(Book2, Q),
+                {async, Fldr} = R,
+                case length(Fldr()) of
+                    X when X > 0 ->
+                        {IdxF, IdxT, X}
+                end
+            end,
+            Spc9),
     Spc9Del = lists:map(fun({add, IdxF, IdxT}) -> {remove, IdxF, IdxT} end,
                         Spc9),
     ok = testutil:book_riakput(Book2, Obj9, Spc9Del),
@@ -751,44 +741,44 @@ query_count(_Config) ->
                         end,
                     R9),
     ok = leveled_bookie:book_close(Book2),
-    {ok, Book3} = leveled_bookie:book_start(RootPath,
-                                            2000,
-                                            50000000,
-                                            testutil:sync_strategy()),
-    lists:foreach(fun({IdxF, IdxT, X}) ->
-                        Q = {index_query,
-                                BucketBin,
-                                {fun testutil:foldkeysfun/3, []},
-                                {IdxF, IdxT, IdxT},
-                                ?KEY_ONLY},
-                        R = leveled_bookie:book_returnfolder(Book3, Q),
-                        {async, Fldr} = R,
-                        case length(Fldr()) of
-                            Y ->
-                                Y = X - 1
-                        end
-                        end,
-                    R9),
+    {ok, Book3} =
+        leveled_bookie:book_start(
+            RootPath, 2000, 50000000, testutil:sync_strategy()),
+    lists:foreach(
+        fun({IdxF, IdxT, X}) ->
+            Q = {index_query,
+                    BucketBin,
+                    {fun testutil:foldkeysfun/3, []},
+                    {IdxF, IdxT, IdxT},
+                    ?KEY_ONLY},
+            R = leveled_bookie:book_returnfolder(Book3, Q),
+            {async, Fldr} = R,
+            case length(Fldr()) of
+                Y ->
+                    Y = X - 1
+            end
+        end,
+        R9),
     ok = testutil:book_riakput(Book3, Obj9, Spc9),
     ok = leveled_bookie:book_close(Book3),
-    {ok, Book4} = leveled_bookie:book_start(RootPath,
-                                            2000,
-                                            50000000,
-                                            testutil:sync_strategy()),
-    lists:foreach(fun({IdxF, IdxT, X}) ->
-                        Q = {index_query,
-                                BucketBin,
-                                {fun testutil:foldkeysfun/3, []},
-                                {IdxF, IdxT, IdxT},
-                                ?KEY_ONLY},
-                        R = leveled_bookie:book_returnfolder(Book4, Q),
-                        {async, Fldr} = R,
-                        case length(Fldr()) of
-                            X ->
-                                ok
-                        end
-                        end,
-                    R9),
+    {ok, Book4} =
+        leveled_bookie:book_start(
+            RootPath, 2000, 50000000, testutil:sync_strategy()),
+    lists:foreach(
+        fun({IdxF, IdxT, X}) ->
+            Q = {index_query,
+                    BucketBin,
+                    {fun testutil:foldkeysfun/3, []},
+                    {IdxF, IdxT, IdxT},
+                    ?KEY_ONLY},
+            R = leveled_bookie:book_returnfolder(Book4, Q),
+            {async, Fldr} = R,
+            case length(Fldr()) of
+                X ->
+                    ok
+            end
+        end,
+        R9),
     testutil:check_forobject(Book4, TestObject),
     
     FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end,
@@ -803,24 +793,15 @@ query_count(_Config) ->
     
     true = sets:size(BucketSet1) == 1, 
     
-    ObjList10A = testutil:generate_objects(5000,
-                                            binary_uuid,
-                                            [],
-                                            V9,
-                                            Indexes9,
-                                            "BucketA"),
-    ObjList10B = testutil:generate_objects(5000,
-                                            binary_uuid,
-                                            [],
-                                            V9,
-                                            Indexes9,
-                                            "BucketB"),
-    ObjList10C = testutil:generate_objects(5000,
-                                            binary_uuid,
-                                            [],
-                                            V9,
-                                            Indexes9,
-                                            "BucketC"),
+    ObjList10A =
+        testutil:generate_objects(
+            5000, binary_uuid, [], V9, Indexes9, "BucketA"),
+    ObjList10B =
+        testutil:generate_objects(
+            5000, binary_uuid, [], V9, Indexes9, "BucketB"),
+    ObjList10C =
+        testutil:generate_objects(
+            5000, binary_uuid, [], V9, Indexes9, "BucketC"),
     testutil:riakload(Book4, ObjList10A),
     testutil:riakload(Book4, ObjList10B),
     testutil:riakload(Book4, ObjList10C),
@@ -847,31 +828,30 @@ query_count(_Config) ->
     ok = leveled_bookie:book_close(Book5),
     
     testutil:reset_filestructure().
-    
 
 
 count_termsonindex(Bucket, IdxField, Book, QType) ->
-    lists:foldl(fun(X, Acc) ->
-                        SW = os:timestamp(),
-                        ST = integer_to_list(X),
-                        ET = ST ++ "|",
-                        Q = {index_query,
-                                Bucket,
-                                {fun testutil:foldkeysfun/3, []},
-                                {IdxField, ST, ET},
-                                QType},
-                        R = leveled_bookie:book_returnfolder(Book, Q),
-                        {async, Folder} = R,
-                        Items = length(Folder()),
-                        io:format("2i query from term ~s on index ~s took " ++
-                                        "~w microseconds~n",
-                                    [ST,
-                                        IdxField,
-                                        timer:now_diff(os:timestamp(), SW)]),
-                        Acc + Items
-                        end,
-                    0,
-                    lists:seq(190, 221)).
+    lists:foldl(
+        fun(X, Acc) ->
+            SW = os:timestamp(),
+            ST = list_to_binary(integer_to_list(X)),
+            Pipe = <<"|">>,
+            ET = <<ST/binary, Pipe/binary>>,
+            Q = {index_query,
+                    Bucket,
+                    {fun testutil:foldkeysfun/3, []},
+                    {IdxField, ST, ET},
+                    QType},
+            R = leveled_bookie:book_returnfolder(Book, Q),
+            {async, Folder} = R,
+            Items = length(Folder()),
+            io:format(
+                "2i query from term ~s on index ~s took ~w microseconds~n",
+                [ST, IdxField, timer:now_diff(os:timestamp(), SW)]),
+            Acc + Items
+        end,
+        0,
+        lists:seq(190, 221)).
 
 multibucket_fold(_Config) ->
     RootPath = testutil:reset_filestructure(),
diff --git a/test/end_to_end/perf_SUITE.erl b/test/end_to_end/perf_SUITE.erl
index 9a3fabd1..98055830 100644
--- a/test/end_to_end/perf_SUITE.erl
+++ b/test/end_to_end/perf_SUITE.erl
@@ -3,10 +3,25 @@
 -define(INFO, info).
 -export([all/0, suite/0]).
 -export([
-    riak_ctperf/1, riak_fullperf/1, riak_profileperf/1
+    riak_ctperf/1, riak_fullperf/1, riak_profileperf/1, riak_miniperf/1
 ]).
 
-all() -> [riak_ctperf].
+-define(PEOPLE_INDEX, <<"people_bin">>).
+-define(MINI_QUERY_DIVISOR, 8).
+-define(RGEX_QUERY_DIVISOR, 32).
+
+-ifndef(performance).
+  -define(performance, riak_ctperf).
+-endif.
+all() -> [?performance].
+
+-if(?performance == riak_profileperf andalso ?OTP_RELEASE >= 24).
+   % Requires map functions from OTP 24
+   -define(ACCOUNTING, true).
+-else.
+   -define(ACCOUNTING, false).
+-endif.
+
 suite() -> [{timetrap, {hours, 16}}].
 
 riak_fullperf(_Config) ->
@@ -24,16 +39,16 @@ riak_fullperf(ObjSize, PM, LC) ->
     output_result(R5A),
     R5B = riak_load_tester(Bucket, 5000000, ObjSize, [], PM, LC),
     output_result(R5B),
-    R10 = riak_load_tester(Bucket, 10000000, ObjSize, [], PM, LC),
+    R10 = riak_load_tester(Bucket, 8000000, ObjSize, [], PM, LC),
     output_result(R10)
     .
 
 riak_profileperf(_Config) ->
     riak_load_tester(
         {<<"SensibleBucketTypeName">>, <<"SensibleBucketName0">>},
-        2000000,
+        1200000,
         2048,
-        [load, head, get, query, mini_query, full, guess, estimate, update],
+        [load, head, get, query, mini_query, regex_query, full, guess, estimate, update],
         zstd,
         as_store
     ).
@@ -42,12 +57,18 @@ riak_profileperf(_Config) ->
 riak_ctperf(_Config) ->
     riak_load_tester(<<"B0">>, 400000, 1024, [], native, as_store).
 
+riak_miniperf(_Config) ->
+    Bucket = {<<"SensibleBucketTypeName">>, <<"SensibleBucketName0">>},
+    R2A = riak_load_tester(Bucket, 2000000, 2048, [], zstd, as_store),
+    output_result(R2A).
+
 riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
     ct:log(
         ?INFO,
         "Basic riak test with KeyCount ~w ObjSize ~w PressMethod ~w Ledger ~w",
         [KeyCount, ObjSize, PM, LC]
     ),
+
     IndexCount = 100000,
 
     GetFetches = KeyCount div 4,
@@ -74,6 +95,7 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
                 IntIndex = "integer" ++ integer_to_list(ListID) ++ "_int",
                 BinIndex = "binary" ++ integer_to_list(ListID) ++ "_bin",
                 [{add, list_to_binary(IntIndex), RandInt},
+                {add, ?PEOPLE_INDEX, list_to_binary(random_people_index())},
                 {add, list_to_binary(IntIndex), RandInt + 1},
                 {add, list_to_binary(BinIndex), <<RandInt:32/integer>>},
                 {add, list_to_binary(BinIndex), <<(RandInt + 1):32/integer>>}]
@@ -82,6 +104,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
 
     CountPerList = KeyCount div 10,
 
+    LoadMemoryTracker = memory_tracking(load, 1000),
+    LoadAccountant = accounting(load, 10000, ProfileList),
     TC4 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 4),
     TC1 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 1),
     TC9 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 9),
@@ -92,6 +116,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
     TC3 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 3),
     TC7 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 7),
     TC10 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 10),
+    ok = stop_accounting(LoadAccountant),
+    {MT0, MP0, MB0} = stop_tracker(LoadMemoryTracker),
 
     ct:log(
         ?INFO,
@@ -104,20 +130,23 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
         (TC1 + TC2 + TC3 + TC4 + TC5 + TC6 + TC7 + TC8 + TC9 + TC10) div 1000,
     ct:log(?INFO, "Total load time ~w ms", [TotalLoadTime]),
 
-    {MT0, MP0, MB0} = memory_usage(),
-
+    HeadMemoryTracker = memory_tracking(head, 1000),
+    HeadAccountant = accounting(head, 2000, ProfileList),
     TotalHeadTime = 
         random_fetches(head, Bookie1, Bucket, KeyCount, HeadFetches),
-    
-    {MT1, MP1, MB1} = memory_usage(),
+    ok = stop_accounting(HeadAccountant),
+    {MT1, MP1, MB1} = stop_tracker(HeadMemoryTracker),
 
+    GetMemoryTracker = memory_tracking(get, 1000),
+    GetAccountant = accounting(get, 3000, ProfileList),
     TotalGetTime = 
         random_fetches(get, Bookie1, Bucket, KeyCount, GetFetches),
+    ok = stop_accounting(GetAccountant),
+    {MT2, MP2, MB2} = stop_tracker(GetMemoryTracker),
 
-    {MT2, MP2, MB2} = memory_usage(),
-
+    QueryMemoryTracker = memory_tracking(query, 1000),
+    QueryAccountant = accounting(query, 1000, ProfileList),
     QuerySize = max(10, IndexCount div 1000),
-    MiniQuerySize = max(1, IndexCount div 50000),
     TotalQueryTime =
         random_queries(
             Bookie1,
@@ -126,6 +155,12 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
             IndexCount,
             QuerySize,
             IndexesReturned),
+    ok = stop_accounting(QueryAccountant),
+    {MT3a, MP3a, MB3a} = stop_tracker(QueryMemoryTracker),
+
+    MiniQueryMemoryTracker = memory_tracking(mini_query, 1000),
+    MiniQueryAccountant = accounting(mini_query, 1000, ProfileList),
+    MiniQuerySize = max(1, IndexCount div 50000),
     TotalMiniQueryTime =
         random_queries(
             Bookie1,
@@ -133,18 +168,76 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
             10,
             IndexCount,
             MiniQuerySize,
-            IndexesReturned div 8),
+            IndexesReturned div ?MINI_QUERY_DIVISOR),
+    ok = stop_accounting(MiniQueryAccountant),
+    {MT3b, MP3b, MB3b} = stop_tracker(MiniQueryMemoryTracker),
+
+    RegexQueryMemoryTracker = memory_tracking(regex_query, 1000),
+    RegexQueryAccountant = accounting(regex_query, 2000, ProfileList),
+    RegexQueryTime =
+        random_people_queries(
+            Bookie1,
+            Bucket,
+            IndexesReturned div ?RGEX_QUERY_DIVISOR),
+    ok = stop_accounting(RegexQueryAccountant),
+    {MT3c, MP3c, MB3c} = stop_tracker(RegexQueryMemoryTracker),
 
-    {MT3, MP3, MB3} = memory_usage(),
+    GuessMemoryTracker = memory_tracking(guess, 1000),
+    GuessAccountant = accounting(guess, 1000, ProfileList),
+    {GuessTime, GuessCount} =
+        lists:foldl(
+            fun(_I, {TSAcc, CountAcc}) ->
+                {TS, Count} = counter(Bookie1, guess),
+                {TSAcc + TS, CountAcc + Count}
+            end,
+            {0, 0},
+            lists:seq(1, 60)
+        ),
+    ok = stop_accounting(GuessAccountant),
+    {MT4a, MP4a, MB4a} = stop_tracker(GuessMemoryTracker),
 
-    {FullFoldTime, SegFoldTime} = size_estimate_summary(Bookie1),
+    EstimateMemoryTracker = memory_tracking(estimate, 1000),
+    EstimateAccountant = accounting(estimate, 1000, ProfileList),
+    {EstimateTime, EstimateCount} =
+        lists:foldl(
+            fun(_I, {TSAcc, CountAcc}) ->
+                {TS, Count} = counter(Bookie1, estimate),
+                {TSAcc + TS, CountAcc + Count}
+            end,
+            {0, 0},
+            lists:seq(1, 40)
+        ),
+    ok = stop_accounting(EstimateAccountant),
+    {MT4b, MP4b, MB4b} = stop_tracker(EstimateMemoryTracker),
 
-    {MT4, MP4, MB4} = memory_usage(),
+    SegFoldTime = (GuessTime + EstimateTime) div 1000,
+    
+    FullFoldMemoryTracker = memory_tracking(full, 1000),
+    FullFoldAccountant = accounting(full, 2000, ProfileList),
+    {FullFoldTime, FullFoldCount} =
+        lists:foldl(
+            fun(_I, {TSAcc, CountAcc}) ->
+                {TS, Count} = counter(Bookie1, full),
+                {TSAcc + TS, CountAcc + Count}
+            end,
+            {0, 0},
+            lists:seq(1, 5)
+        ),
+    ok = stop_accounting(FullFoldAccountant),
+    {MT5, MP5, MB5} = stop_tracker(FullFoldMemoryTracker),
 
+    ct:log(
+        info,
+        "Guess size ~w Estimate size ~w Actual size ~w",
+        [GuessCount div 60, EstimateCount div 40, FullFoldCount div 10]
+    ),
+
+    UpdateMemoryTracker = memory_tracking(update, 1000),
+    UpdateAccountant = accounting(update, 1000, ProfileList),
     TotalUpdateTime =
         rotate_chunk(Bookie1, <<"UpdBucket">>, KeyCount div 50, ObjSize),
-
-    {MT5, MP5, MB5} = memory_usage(),
+    ok = stop_accounting(UpdateAccountant),
+    {MT6, MP6, MB6} = stop_tracker(UpdateMemoryTracker),
 
     DiskSpace = lists:nth(1, string:tokens(os:cmd("du -sh riakLoad"), "\t")),
     ct:log(?INFO, "Disk space taken by test ~s", [DiskSpace]),
@@ -172,8 +265,9 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
                     P ->
                         P
                 end,
+            io:format(user, "~nProfile ~p:~n", [P]),
             ProFun = profile_fun(P0, ProfileData),
-            profile_test(Bookie1, ProFun)
+            profile_test(Bookie1, ProFun, P)
         end,
         ProfileList),
 
@@ -183,21 +277,26 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
     {KeyCount, ObjSize, {PM, LC},
         TotalLoadTime,
         TotalHeadTime, TotalGetTime,
-        TotalQueryTime, TotalMiniQueryTime, FullFoldTime, SegFoldTime,
+        TotalQueryTime, TotalMiniQueryTime, RegexQueryTime,
+        FullFoldTime div 1000, SegFoldTime,
         TotalUpdateTime,
         DiskSpace,
-        {(MT0 + MT1 + MT2 + MT3 + MT4 + MT5) div 6000000,
-            (MP0 + MP1 + MP2 + MP3 + MP4 + MP5) div 6000000,
-            (MB0 + MB1 + MB2 + MB3 + MB4 + MB5) div 6000000},
+        {(MT0 + MT1 + MT2 + MT3a + MT3b + MT3c + MT4a + MT4b + MT5 + MT6)
+                div 9,
+            (MP0 + MP1 + MP2 + MP3a + MP3b + MP3c + MP4a + MP4b + MP5 + MP6)
+                div 9,
+            (MB0 + MB1 + MB2 + MB3a + MB3b + MB3c + MB4a + MB4b + MB5 + MB6)
+                div 9},
         SSTPids, CDBPids}.
 
-
-profile_test(Bookie, ProfileFun) ->
+profile_test(Bookie, ProfileFun, P) ->
     {Inker, Pcl, SSTPids, PClerk, CDBPids, IClerk} = get_pids(Bookie),
     TestPid = self(),
     profile_app(
         [TestPid, Bookie, Inker, IClerk, Pcl, PClerk] ++ SSTPids ++ CDBPids,
-        ProfileFun).
+        ProfileFun,
+        P
+    ).
 
 get_pids(Bookie) ->
     {ok, Inker, Pcl} = leveled_bookie:book_returnactors(Bookie),
@@ -211,7 +310,8 @@ output_result(
     {KeyCount, ObjSize, PressMethod,
         TotalLoadTime,
         TotalHeadTime, TotalGetTime,
-        TotalQueryTime, TotalMiniQueryTime, TotalFullFoldTime, TotalSegFoldTime,
+        TotalQueryTime, TotalMiniQueryTime, RegexQueryTime,
+        TotalFullFoldTime, TotalSegFoldTime,
         TotalUpdateTime,
         DiskSpace,
         {TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB},
@@ -227,6 +327,7 @@ output_result(
         "TotalGetTime - ~w ms~n"
         "TotalQueryTime - ~w ms~n"
         "TotalMiniQueryTime - ~w ms~n"
+        "TotalRegexQueryTime - ~w ms~n"
         "TotalFullFoldTime - ~w ms~n"
         "TotalAAEFoldTime - ~w ms~n"
         "TotalUpdateTime - ~w ms~n"
@@ -236,7 +337,8 @@ output_result(
         "Closing count of CDB Files - ~w~n",
         [KeyCount, ObjSize, PressMethod,
             TotalLoadTime, TotalHeadTime, TotalGetTime,
-            TotalQueryTime, TotalMiniQueryTime, TotalFullFoldTime, TotalSegFoldTime,
+            TotalQueryTime, TotalMiniQueryTime, RegexQueryTime,
+            TotalFullFoldTime, TotalSegFoldTime,
             TotalUpdateTime,
             DiskSpace,
             TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB,
@@ -244,13 +346,12 @@ output_result(
     ).
 
 memory_usage() ->
-    garbage_collect(), % GC the test process
     MemoryUsage = erlang:memory(),
     {element(2, lists:keyfind(total, 1, MemoryUsage)),
         element(2, lists:keyfind(processes, 1, MemoryUsage)),
         element(2, lists:keyfind(binary, 1, MemoryUsage))}.
 
-profile_app(Pids, ProfiledFun) ->
+profile_app(Pids, ProfiledFun, P) ->
 
     eprof:start(),
     eprof:start_profiling(Pids),
@@ -258,40 +359,12 @@ profile_app(Pids, ProfiledFun) ->
     ProfiledFun(),
 
     eprof:stop_profiling(),
-    eprof:analyze(total),
-    eprof:stop().
-
-size_estimate_summary(Bookie) ->
-    Loops = 10,
-    ct:log(
-        ?INFO,
-        "Size Estimate Tester (SET) started with Loops ~w",
-        [Loops]
-    ),
-    {{TotalGuessTime, TotalEstimateTime, TotalCountTime}, 
-            {TotalEstimateVariance, TotalGuessVariance}} =
-        lists:foldl(
-            fun(_I, {{GT, ET, CT}, {AET, AGT}}) ->
-                {{GT0, ET0, CT0}, {AE0, AG0}} = size_estimate_tester(Bookie),
-                {{GT + GT0, ET + ET0, CT + CT0}, {AET + AE0, AGT + AG0}}
-            end,
-            {{0, 0, 0}, {0, 0}},
-            lists:seq(1, Loops)
-        ),
-    ct:log(
-        ?INFO,
-        "SET: MeanGuess ~w ms MeanEstimate ~w ms MeanCount ~w ms",
-        [TotalGuessTime div 10000,
-            TotalEstimateTime div 10000,
-            TotalCountTime div 10000]
-    ),
-    ct:log(
-        ?INFO,
-        "Mean variance in Estimate ~w Guess ~w",
-        [TotalEstimateVariance div Loops, TotalGuessVariance div Loops]
-    ),
-    %% Assume that segment-list folds are 10 * as common as all folds
-    {TotalCountTime div 1000, (TotalGuessTime + TotalEstimateTime) div 1000}.
+    eprof:log(atom_to_list(P) ++ ".log"),
+    eprof:analyze(total, [{filter, [{time, 150000}]}]),
+    eprof:stop(),
+    {ok, Analysis} = file:read_file(atom_to_list(P) ++ ".log"),
+    io:format(user, "~n~s~n", [Analysis])
+    .
 
 
 rotate_chunk(Bookie, Bucket, KeyCount, ObjSize) ->
@@ -311,15 +384,6 @@ rotate_chunk(Bookie, Bucket, KeyCount, ObjSize) ->
             end),
     TC div 1000.
 
-load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) ->
-    ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]),
-    ObjList =
-        generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk),
-    {TC, ok} = timer:tc(fun() -> testutil:riakload(Bookie, ObjList) end),
-    garbage_collect(),
-    timer:sleep(2000),
-    TC.
-
 generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) ->
     testutil:generate_objects(
         CountPerList, 
@@ -329,31 +393,60 @@ generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) ->
         Bucket
     ).
 
-size_estimate_tester(Bookie) ->
-    %% Data size test - calculate data size, then estimate data size
-    {CountTS, Count} = counter(Bookie, full),
-    {CountTSEstimate, CountEstimate} = counter(Bookie, estimate),
-    {CountTSGuess, CountGuess} = counter(Bookie, guess),
-    {GuessTolerance, EstimateTolerance} =
-        case Count of
-            C when C < 500000 ->
-                {0.20, 0.15};
-            C when C < 1000000 ->
-                {0.12, 0.1};
-            C when C < 2000000 ->
-                {0.1, 0.08};
-            _C ->
-                {0.08, 0.05}
-        end,
+load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) ->
+    ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]),
+    time_load_chunk(
+        Bookie,
+        {Bucket, base64:encode(leveled_rand:rand_bytes(ObjSize)), IndexGenFun(Chunk)},
+        (Chunk - 1) * CountPerList + 1,
+        Chunk * CountPerList,
+        0,
+        0
+    ).
 
-    true =
-        ((CountGuess / Count) > (1.0 - GuessTolerance))
-            and ((CountGuess / Count) < (1.0 + GuessTolerance)),
-    true =
-        ((CountEstimate / Count) > (1.0 - EstimateTolerance))
-            and ((CountEstimate / Count) < (1.0 + EstimateTolerance)),
-    {{CountTSGuess, CountTSEstimate, CountTS},
-        {abs(CountEstimate - Count), abs(CountGuess - Count)}}.
+time_load_chunk(
+        _Bookie, _ObjDetails, KeyNumber, TopKey, TotalTime, PC)
+        when KeyNumber > TopKey ->
+    garbage_collect(),
+    timer:sleep(2000),
+    ct:log(
+        ?INFO,
+        "Count of ~w pauses during chunk load",
+        [PC]
+    ),
+    TotalTime;
+time_load_chunk(
+        Bookie, {Bucket, Value, IndexGen}, KeyNumber, TopKey, TotalTime, PC) ->
+    ThisProcess = self(),
+    spawn(
+        fun() ->
+            {RiakObj, IndexSpecs} =
+                testutil:set_object(
+                    Bucket, testutil:fixed_bin_key(KeyNumber), Value, IndexGen, []),
+            {TC, R} =
+                timer:tc(
+                    testutil, book_riakput, [Bookie, RiakObj, IndexSpecs]
+                ),
+            case R of
+                ok ->
+                    ThisProcess! {TC, 0};
+                pause ->
+                    timer:sleep(40),
+                    ThisProcess ! {TC + 40000, 1}
+            end
+        end
+    ),
+    receive
+        {PutTime, Pause} ->
+            time_load_chunk(
+                Bookie, 
+                {Bucket, Value, IndexGen},
+                KeyNumber + 1,
+                TopKey,
+                TotalTime + PutTime,
+                PC + Pause
+            )
+    end.
 
 counter(Bookie, full) ->
     {async, DataSizeCounter} =
@@ -471,6 +564,42 @@ random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) ->
     TC div 1000.
 
 
+random_people_queries(Bookie, Bucket, IndexesReturned) ->
+    SeventiesWillowRegex =
+        "[^\\|]*\\|197[0-9]{5}\\|[^\\|]*\\|"
+        "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*",
+        %% born in the 70s with Willow as a given name
+    QueryFun =
+        fun() ->
+            Surname = get_random_surname(),
+            Range =
+                {?PEOPLE_INDEX,
+                    Surname,
+                    <<Surname/binary, 126:8/integer>>
+            },
+            {ok, TermRegex} =
+                re:compile(SeventiesWillowRegex),
+            FoldKeysFun =  fun(_B, _K, Cnt) -> Cnt + 1 end,
+            {async, R} =
+                leveled_bookie:book_indexfold(
+                    Bookie,
+                    {Bucket, <<>>}, 
+                    {FoldKeysFun, 0},
+                    Range,
+                    {true, TermRegex}),
+            R()
+        end,
+    
+    {TC, {QC, EF}} =
+        timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end),
+    ct:log(
+        ?INFO,
+        "Fetch of ~w index entries by regex in ~w queries took ~w ms",
+        [EF, QC, TC div 1000]
+    ),
+    TC div 1000.
+
+
 run_queries(_QueryFun, QueryCount, EntriesFound, TargetEntries)
         when EntriesFound >= TargetEntries ->
     {QueryCount, EntriesFound};
@@ -486,7 +615,8 @@ profile_fun(
         {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned}) ->
     fun() ->
         random_queries(
-            Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned div 8)
+            Bookie, Bucket, 10, IndexCount, QuerySize,
+            IndexesReturned div ?MINI_QUERY_DIVISOR)
     end;
 profile_fun(
         {query, QuerySize},
@@ -495,6 +625,13 @@ profile_fun(
         random_queries(
             Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned)
     end;
+profile_fun(
+        regex_query,
+        {Bookie, Bucket, _KeyCount, _ObjSize, _IndexCount, IndexesReturned}) ->
+    fun() ->
+        random_people_queries(
+            Bookie, Bucket, IndexesReturned div ?RGEX_QUERY_DIVISOR)
+    end;
 profile_fun(
         {head, HeadFetches},
         {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) ->
@@ -524,11 +661,230 @@ profile_fun(
 profile_fun(
         CounterFold,
         {Bookie, _Bucket, _KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) ->
+    Runs =
+        case CounterFold of
+            full ->
+                20;
+            estimate ->
+                40;
+            guess ->
+                100
+        end,
     fun() ->
         lists:foreach(
             fun(_I) ->
                 _ = counter(Bookie, CounterFold)
             end,
-            lists:seq(1, 10)
+            lists:seq(1, Runs)
         )
     end.
+
+random_people_index() ->
+    io_lib:format(
+        "~s|~s|~s|#~s#~s#~s|#~s#~s#~s",
+        [get_random_surname(),
+            get_random_dob(),
+            get_random_dod(),
+            get_random_givenname(), get_random_givenname(), get_random_givenname(),
+            get_random_postcode(), get_random_postcode(), get_random_postcode()
+        ]
+    ).
+
+get_random_surname() ->
+    lists:nth(
+        rand:uniform(100),
+        [<<"Smith">>, <<"Jones">>, <<"Taylor">>, <<"Brown">>, <<"Williams">>,
+            <<"Wilson">>, <<"Johnson">>, <<"Davies">>, <<"Patel">>, <<"Robinson">>,
+            <<"Wright">>, <<"Thompson">>, <<"Evans">>, <<"Walker">>, <<"White">>,
+            <<"Roberts">>, <<"Green">>, <<"Hall">>, <<"Thomas">>, <<"Clarke">>,
+            <<"Jackson">>, <<"Wood">>, <<"Harris">>, <<"Edwards">>, <<"Turner">>,
+            <<"Martin">>, <<"Cooper">>, <<"Hill">>, <<"Ward">>, <<"Hughes">>,
+            <<"Moore">>, <<"Clark">>, <<"King">>, <<"Harrison">>, <<"Lewis">>,
+            <<"Baker">>, <<"Lee">>, <<"Allen">>, <<"Morris">>, <<"Khan">>,
+            <<"Scott">>, <<"Watson">>, <<"Davis">>, <<"Parker">>, <<"James">>,
+            <<"Bennett">>, <<"Young">>, <<"Phillips">>, <<"Richardson">>, <<"Mitchell">>,
+            <<"Bailey">>, <<"Carter">>, <<"Cook">>, <<"Singh">>, <<"Shaw">>,
+            <<"Bell">>, <<"Collins">>, <<"Morgan">>, <<"Kelly">>, <<"Begum">>,
+            <<"Miller">>, <<"Cox">>, <<"Hussain">>, <<"Marshall">>, <<"Simpson">>,
+            <<"Price">>, <<"Anderson">>, <<"Adams">>, <<"Wilkinson">>, <<"Ali">>,
+            <<"Ahmed">>, <<"Foster">>, <<"Ellis">>, <<"Murphy">>, <<"Chapman">>,
+            <<"Mason">>, <<"Gray">>, <<"Richards">>, <<"Webb">>, <<"Griffiths">>,
+            <<"Hunt">>, <<"Palmer">>, <<"Campbell">>, <<"Holmes">>, <<"Mills">>,
+            <<"Rogers">>, <<"Barnes">>, <<"Knight">>, <<"Matthews">>, <<"Barker">>,
+            <<"Powell">>, <<"Stevens">>, <<"Kaur">>, <<"Fisher">>, <<"Butler">>,
+            <<"Dixon">>, <<"Russell">>, <<"Harvey">>, <<"Pearson">>, <<"Graham">>]
+    ).
+
+get_random_givenname() ->
+    lists:nth(
+        rand:uniform(20),
+        [<<"Noah">>, <<"Oliver">>, <<"George">>, <<"Arthur">>, <<"Muhammad">>,
+            <<"Leo">>, <<"Harry">>, <<"Oscar">> , <<"Archie">>, <<"Henry">>,
+            <<"Olivia">>, <<"Amelia">>, <<"Isla">>, <<"Ava">>, <<"Ivy">>,
+            <<"Freya">>, <<"Lily">>, <<"Florence">>, <<"Mia">>, <<"Willow">>
+    ]).
+
+get_random_dob() ->
+    io_lib:format(
+        "~4..0B~2..0B~2..0B",
+        [1900 + rand:uniform(99), rand:uniform(12), rand:uniform(28)]
+    ).
+
+get_random_dod() ->
+    io_lib:format(
+        "~4..0B~2..0B~2..0B",
+        [2000 + rand:uniform(20), rand:uniform(12), rand:uniform(28)]
+    ).
+
+get_random_postcode() ->
+    io_lib:format(
+        "LS~w ~wXX", [rand:uniform(26), rand:uniform(9)]
+    ).
+
+
+memory_tracking(Phase, Timeout) ->
+    spawn(
+        fun() ->
+            memory_tracking(Phase, Timeout, {0, 0, 0}, 0)
+        end
+    ).
+
+memory_tracking(Phase, Timeout, {TAcc, PAcc, BAcc}, Loops) ->
+    receive
+        {stop, Caller} ->
+            {T, P, B} = memory_usage(),
+            TAvg = (T + TAcc) div ((Loops + 1) * 1000000),
+            PAvg = (P + PAcc) div ((Loops + 1) * 1000000),
+            BAvg = (B + BAcc) div ((Loops + 1) * 1000000),
+            print_memory_stats(Phase, TAvg, PAvg, BAvg),
+            Caller ! {TAvg, PAvg, BAvg}
+    after Timeout ->
+        {T, P, B} = memory_usage(),
+        memory_tracking(
+            Phase, Timeout, {TAcc + T, PAcc + P, BAcc + B}, Loops + 1)
+    end.
+
+
+-if(?performance == riak_ctperf).
+print_memory_stats(_Phase, _TAvg, _PAvg, _BAvg) ->
+    ok.
+-else.
+print_memory_stats(Phase, TAvg, PAvg, BAvg) ->
+    io:format(
+        user,
+        "~nFor ~w memory stats: total ~wMB process ~wMB binary ~wMB~n",
+        [Phase, TAvg, PAvg, BAvg]
+    ).
+-endif.
+
+dummy_accountant() ->
+    spawn(fun() -> receive {stop, Caller} -> Caller ! ok end end).
+    
+stop_accounting(Accountant) ->
+    Accountant ! {stop, self()},
+    receive ok -> ok end.
+
+stop_tracker(Tracker) ->
+    garbage_collect(),
+        % Garbage collect the test process, before getting the memory stats
+    Tracker ! {stop, self()},
+    receive MemStats -> MemStats end.
+
+-if(?ACCOUNTING).
+
+-define(ACCT_TYPES, [scheduler, dirty_io_scheduler, dirty_cpu_scheduler, aux]).
+
+accounting(Phase, Timeout, ProfileList) ->
+    case lists:member(Phase, ProfileList) of
+        true ->
+            ZeroCounters =
+                #{
+                    emulator => 0,
+                    aux => 0,
+                    check_io => 0,
+                    gc => 0,
+                    other => 0
+                },
+            InitCounters =
+                lists:map(fun(T) -> {T, ZeroCounters} end, ?ACCT_TYPES),
+            spawn(
+                fun() ->
+                    accounting(Phase, Timeout, maps:from_list(InitCounters), 0)
+                end
+            );
+        false ->
+            dummy_accountant()
+    end.
+
+accounting(Phase, Timeout, Counters, Loops) ->
+    receive
+        {stop, Caller} ->
+            io:format(
+                user,
+                "~n~nStats for Phase ~p after loops ~p:~n",
+                [Phase, Loops]
+            ),
+            lists:foreach(
+                fun(S) ->
+                    scheduler_output(S, maps:get(S, Counters))
+                end,
+                ?ACCT_TYPES
+            ),
+            Caller ! ok
+    after Timeout ->
+        msacc:start(Timeout div 5),
+        UpdCounters =
+            lists:foldl(
+                fun(StatMap, CountersAcc) ->
+                    Type = maps:get(type, StatMap),
+                    case lists:member(Type, ?ACCT_TYPES) of
+                        true ->
+                            TypeAcc =
+                                maps:intersect_with(
+                                    fun(_K, V1, V2) -> V1 + V2 end,
+                                    maps:get(counters, StatMap),
+                                    maps:get(Type, CountersAcc)
+                                ),
+                            maps:update(Type, TypeAcc, CountersAcc);
+                        false ->
+                            CountersAcc
+                    end
+                end,
+                Counters,
+                msacc:stats()
+            ),
+        accounting(Phase, Timeout, UpdCounters, Loops + 1)
+    end.
+
+scheduler_output(Scheduler, CounterMap) ->
+    Total =
+        maps:get(emulator, CounterMap) +
+        maps:get(aux, CounterMap) +
+        maps:get(check_io, CounterMap) +
+        maps:get(gc, CounterMap) +
+        maps:get(other, CounterMap),
+    GC = maps:get(gc, CounterMap),
+    GCperc = case Total > 0 of true -> GC/Total; false -> 0.0 end,
+    io:format(
+        user,
+        "~nFor ~w:~n"
+        "emulator=~w, aux=~w, check_io=~w, gc=~w, other=~w~n"
+        "total ~w~n"
+        "percentage_gc ~.2f %~n",
+        [Scheduler,
+            maps:get(emulator, CounterMap),
+            maps:get(aux, CounterMap),
+            maps:get(check_io, CounterMap),
+            GC,
+            maps:get(other, CounterMap),
+            Total,
+            GCperc
+        ]
+    ).
+
+-else.
+
+accounting(_Phase, _Timeout, _ProfileList) ->
+    dummy_accountant().
+
+-endif.
\ No newline at end of file
diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl
index 222a7472..8a74ac16 100644
--- a/test/end_to_end/recovery_SUITE.erl
+++ b/test/end_to_end/recovery_SUITE.erl
@@ -483,12 +483,14 @@ rotate_wipe_compact(Strategy1, Strategy2) ->
     {ok, Book3} = leveled_bookie:book_start(BookOptsAlt),
 
     {KSpcL2, _V2} = testutil:put_indexed_objects(Book3, "AltBucket6", 3000),
-    Q2 = fun(RT) -> {index_query,
-                        "AltBucket6",
-                        {fun testutil:foldkeysfun/3, []},
-                        {"idx1_bin", "#", "|"},
-                        {RT, undefined}}
-                    end,
+    Q2 =
+        fun(RT) -> 
+            {index_query,
+                "AltBucket6",
+                {fun testutil:foldkeysfun/3, []},
+                {<<"idx1_bin">>, <<"#">>, <<"|">>},
+                {RT, undefined}}
+        end,
     {async, KFolder2A} = leveled_bookie:book_returnfolder(Book3, Q2(false)),
     KeyList2A = lists:usort(KFolder2A()),
     true = length(KeyList2A) == 3000,
@@ -629,12 +631,14 @@ recovr_strategy(_Config) ->
                         true = VCH == VCG
                         end,
                     lists:nthtail(6400, AllSpcL)),
-    Q = fun(RT) -> {index_query,
-                        "Bucket6",
-                        {fun testutil:foldkeysfun/3, []},
-                        {"idx1_bin", "#", "|"},
-                        {RT, undefined}}
-                    end,
+    Q =
+        fun(RT) ->
+            {index_query,
+                "Bucket6",
+                {fun testutil:foldkeysfun/3, []},
+                {<<"idx1_bin">>, <<"#">>, <<"|">>},
+                {RT, undefined}}
+        end,
     {async, TFolder} = leveled_bookie:book_returnfolder(Book1, Q(true)),
     KeyTermList = TFolder(),
     {async, KFolder} = leveled_bookie:book_returnfolder(Book1, Q(false)),
@@ -660,12 +664,14 @@ recovr_strategy(_Config) ->
     KeyList2 = lists:usort(KFolder2()),
     true = length(KeyList2) == 6400,
 
-    Q2 = fun(RT) -> {index_query,
-                        "AltBucket6",
-                        {fun testutil:foldkeysfun/3, []},
-                        {"idx1_bin", "#", "|"},
-                        {RT, undefined}}
-                    end,
+    Q2 =
+        fun(RT) ->
+            {index_query,
+                "AltBucket6",
+                {fun testutil:foldkeysfun/3, []},
+                {<<"idx1_bin">>, <<"#">>, <<"|">>},
+                {RT, undefined}}
+        end,
     {async, KFolder2A} = leveled_bookie:book_returnfolder(Book2, Q2(false)),
     KeyList2A = lists:usort(KFolder2A()),
     true = length(KeyList2A) == 3000,
diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl
index 2c8dfc7e..f003be3e 100644
--- a/test/end_to_end/testutil.erl
+++ b/test/end_to_end/testutil.erl
@@ -68,7 +68,7 @@
             compact_and_wait/1]).
 
 -define(RETURN_TERMS, {true, undefined}).
--define(SLOWOFFER_DELAY, 10).
+-define(SLOWOFFER_DELAY, 40).
 -define(V1_VERS, 1).
 -define(MAGIC, 53). % riak_kv -> riak_object
 -define(MD_VTAG,     <<"X-Riak-VTag">>).
@@ -691,21 +691,24 @@ load_objects(ChunkSize, GenList, Bookie, TestObject, Generator, SubListL) ->
 
 
 get_randomindexes_generator(Count) ->
-    Generator = fun() ->
-            lists:map(fun(X) ->
-                                {add,
-                                    "idx" ++ integer_to_list(X) ++ "_bin",
-                                    get_randomdate() ++ get_randomname()} end,
-                        lists:seq(1, Count))
+    Generator =
+        fun() ->
+            lists:map(
+                fun(X) ->
+                    {add,
+                        list_to_binary("idx" ++ integer_to_list(X) ++ "_bin"),
+                        list_to_binary(get_randomdate() ++ get_randomname())}
+                end,
+                lists:seq(1, Count))
         end,
     Generator.
 
 name_list() ->
     [{1, "Sophia"}, {2, "Emma"}, {3, "Olivia"}, {4, "Ava"},
-            {5, "Isabella"}, {6, "Mia"}, {7, "Zoe"}, {8, "Lily"},
-            {9, "Emily"}, {10, "Madelyn"}, {11, "Madison"}, {12, "Chloe"},
-            {13, "Charlotte"}, {14, "Aubrey"}, {15, "Avery"},
-            {16, "Abigail"}].
+        {5, "Isabella"}, {6, "Mia"}, {7, "Zoe"}, {8, "Lily"},
+        {9, "Emily"}, {10, "Madelyn"}, {11, "Madison"}, {12, "Chloe"},
+        {13, "Charlotte"}, {14, "Aubrey"}, {15, "Avery"},
+        {16, "Abigail"}].
 
 get_randomname() ->
     NameList = name_list(),
@@ -738,7 +741,7 @@ check_indexed_objects(Book, B, KSpecL, V) ->
             fun({K, Spc}) ->
                 {ok, O} = book_riakget(Book, B, K),
                 V = testutil:get_value(O),
-                {add, "idx1_bin", IdxVal} = lists:keyfind(add, 1, Spc),
+                {add, <<"idx1_bin">>, IdxVal} = lists:keyfind(add, 1, Spc),
                 {IdxVal, K}
             end,
             KSpecL),
@@ -749,7 +752,7 @@ check_indexed_objects(Book, B, KSpecL, V) ->
             {index_query,
                 B,
                 {fun foldkeysfun/3, []},
-                {"idx1_bin", "0", "|"},
+                {<<"idx1_bin">>, <<"0">>, <<"|">>},
                 ?RETURN_TERMS}),
     SW = os:timestamp(),
     {async, Fldr} = R,
@@ -796,11 +799,12 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i) ->
     put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V).
 
 put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) ->
+    SW = os:timestamp(),
     IndexGen = get_randomindexes_generator(1),
-    
+    ThisProcess = self(),
     FindAdditionFun = fun(SpcItem) -> element(1, SpcItem) == add end,
     MapFun = 
-        fun({K, Spc}) ->
+        fun({K, Spc}, Acc) ->
             OldSpecs = lists:filter(FindAdditionFun, Spc),
             {RemoveSpc, AddSpc} =
                 case RemoveOld2i of
@@ -809,26 +813,45 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) ->
                     false ->
                         {[], OldSpecs}
                 end,
-            {O, DeltaSpecs} =
-                set_object(Bucket, K, V,
-                            IndexGen, RemoveSpc, AddSpc),
-            % DeltaSpecs should be new indexes added, and any old indexes which
-            % have been removed by this change where RemoveOld2i is true.
-            %
-            % The actual indexes within the object should reflect any history
-            % of indexes i.e. when RemoveOld2i is false.
-            %
-            % The [{Key, SpecL}] returned should accrue additions over loops if
-            % RemoveOld2i is false
-            case book_riakput(Book, O, DeltaSpecs) of
-                ok -> ok;
-                pause -> timer:sleep(?SLOWOFFER_DELAY)
-            end,
+            PutFun =
+                fun() ->
+                    {O, DeltaSpecs} =
+                        set_object(
+                            Bucket, K, V, IndexGen, RemoveSpc, AddSpc),
+                    % DeltaSpecs should be new indexes added, and any old
+                    % indexes which have been removed by this change where
+                    % RemoveOld2i is true.
+                    %
+                    % The actual indexes within the object should reflect any
+                    % history of indexes i.e. when RemoveOld2i is false.
+                    %
+                    % The [{Key, SpecL}] returned should accrue additions over
+                    % loops if RemoveOld2i is false
+                    R =
+                        case book_riakput(Book, O, DeltaSpecs) of
+                            ok ->
+                                ok;
+                            pause ->
+                                timer:sleep(?SLOWOFFER_DELAY),
+                                pause
+                        end,
+                    ThisProcess ! {R, DeltaSpecs}
+                end,
+            spawn(PutFun),
+            AccOut =
+                receive
+                    {ok, NewSpecs} -> Acc;
+                    {pause, NewSpecs} -> Acc + 1
+                end,
             % Note that order in the SpecL is important, as
             % check_indexed_objects, needs to find the latest item added
-            {K, DeltaSpecs ++ AddSpc}
+            {{K, NewSpecs ++ AddSpc}, AccOut}
         end,
-    RplKSpecL = lists:map(MapFun, KSpecL),
+    {RplKSpecL, Pauses} = lists:mapfoldl(MapFun, 0, KSpecL),
+    io:format(
+        "Altering ~w objects took ~w ms with ~w pauses~n",
+        [length(KSpecL), timer:now_diff(os:timestamp(), SW) div 1000, Pauses]
+    ),
     {RplKSpecL, V}.
 
 rotating_object_check(RootPath, B, NumberOfObjects) ->
diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl
index 16ea215d..37b5e1af 100644
--- a/test/end_to_end/tictac_SUITE.erl
+++ b/test/end_to_end/tictac_SUITE.erl
@@ -378,10 +378,13 @@ index_compare(_Config) ->
     GetTicTacTreeFun =
         fun(X, Bookie) ->
             SW = os:timestamp(),
-            ST = "!",
-            ET = "|",
+            ST = <<"!">>,
+            ET = <<"|">>,
             Q = {tictactree_idx,
-                    {BucketBin, "idx" ++ integer_to_list(X) ++ "_bin", ST, ET},
+                    {BucketBin,
+                        list_to_binary("idx" ++ integer_to_list(X) ++ "_bin"),
+                        ST,
+                        ET},
                     TreeSize,
                     fun(_B, _K) -> accumulate end},
             {async, Folder} = leveled_bookie:book_returnfolder(Bookie, Q),
@@ -442,12 +445,14 @@ index_compare(_Config) ->
     true = DL2_0 == [],
     true = length(DL2_1) > 100,
 
-    IdxSpc = {add, "idx2_bin", "zz999"},
-    {TestObj, TestSpc} = testutil:generate_testobject(BucketBin,
-                                                        term_to_binary("K9.Z"),
-                                                        "Value1",
-                                                        [IdxSpc],
-                                                        [{"MDK1", "MDV1"}]),
+    IdxSpc = {add, <<"idx2_bin">>, <<"zz999">>},
+    {TestObj, TestSpc} =
+        testutil:generate_testobject(
+            BucketBin,
+            term_to_binary("K9.Z"),
+            "Value1",
+            [IdxSpc],
+            [{"MDK1", "MDV1"}]),
     ok = testutil:book_riakput(Book2C, TestObj, TestSpc),
     testutil:check_forobject(Book2C, TestObj),
 
@@ -457,25 +462,30 @@ index_compare(_Config) ->
     TicTacTree3_P3 = GetTicTacTreeFun(2, Book2D),
 
     % Merge the tree across the partitions
-    TicTacTree3_Joined = lists:foldl(fun leveled_tictac:merge_trees/2,
-                                        TicTacTree3_P1,
-                                        [TicTacTree3_P2, TicTacTree3_P3]),
+    TicTacTree3_Joined =
+        lists:foldl(
+            fun leveled_tictac:merge_trees/2,
+            TicTacTree3_P1,
+            [TicTacTree3_P2, TicTacTree3_P3]),
 
     % Find all keys index, and then just the last key
     IdxQ1 = {index_query,
                 BucketBin,
                 {fun testutil:foldkeysfun/3, []},
-                {"idx2_bin", "zz", "zz|"},
+                {<<"idx2_bin">>, <<"zz">>, <<"zz|">>},
                 {true, undefined}},
     {async, IdxFolder1} = leveled_bookie:book_returnfolder(Book2C, IdxQ1),
     true = IdxFolder1() >= 1,
 
-    DL_3to2B = leveled_tictac:find_dirtyleaves(TicTacTree2_P1,
-                                                TicTacTree3_P1),
-    DL_3to2C = leveled_tictac:find_dirtyleaves(TicTacTree2_P2,
-                                                TicTacTree3_P2),
-    DL_3to2D = leveled_tictac:find_dirtyleaves(TicTacTree2_P3,
-                                                TicTacTree3_P3),
+    DL_3to2B =
+        leveled_tictac:find_dirtyleaves(
+            TicTacTree2_P1, TicTacTree3_P1),
+    DL_3to2C =
+        leveled_tictac:find_dirtyleaves(
+            TicTacTree2_P2, TicTacTree3_P2),
+    DL_3to2D =
+        leveled_tictac:find_dirtyleaves(
+            TicTacTree2_P3, TicTacTree3_P3),
     io:format("Individual tree comparison found dirty leaves of ~w ~w ~w~n",
                 [DL_3to2B, DL_3to2C, DL_3to2D]),
 
@@ -509,7 +519,7 @@ index_compare(_Config) ->
     MismatchQ = {index_query,
                     BucketBin,
                     {FoldKeysIndexQFun, []},
-                    {"idx2_bin", "!", "|"},
+                    {<<"idx2_bin">>, <<"!">>, <<"|">>},
                     {true, undefined}},
     {async, MMFldr_2A} = leveled_bookie:book_returnfolder(Book2A, MismatchQ),
     {async, MMFldr_2B} = leveled_bookie:book_returnfolder(Book2B, MismatchQ),
@@ -531,7 +541,7 @@ index_compare(_Config) ->
     io:format("Differences between lists ~w~n", [Diffs]),
 
     % The actual difference is discovered
-    true = lists:member({"zz999", term_to_binary("K9.Z")}, Diffs),
+    true = lists:member({<<"zz999">>, term_to_binary("K9.Z")}, Diffs),
     % Without discovering too many others
     true = length(Diffs) < 20,
 

From da092d01bc7229f3f2d5e2bf9e94fb47e11a258f Mon Sep 17 00:00:00 2001
From: Martin Sumner <martin.sumner@adaptip.co.uk>
Date: Tue, 3 Sep 2024 16:34:41 +0100
Subject: [PATCH 2/2] Make tictac more efficient by making level1 a map (#441)

* Make tictac more efficient by making level1 a map

Pre-change (1M keys, tree size large):

Generating Keys took 2513 milliseconds
Memory footprint [{total,356732576},{processes,334051328},{processes_used,334044488},{system,22681248},{atom,540873},{atom_used,524383},{binary,1015120},{code,9692859},{ets,721496}]
Generating new tree took 1 milliseconds
Loading tree took 27967 milliseconds
Memory footprint [{total,36733040},{processes,8875472},{processes_used,8875048},{system,27857568},{atom,540873},{atom_used,524449},{binary,6236480},{code,9692859},{ets,721496}]
Exporting tree took 434 milliseconds
Importing tree took 100 milliseconds
Memory footprint [{total,155941512},{processes,123734808},{processes_used,123734384},{system,32206704},{atom,540873},{atom_used,524449},{binary,10401144},{code,9692859},{ets,721496}]
Garbage collect
Memory footprint [{total,39660504},{processes,8257520},{processes_used,8256968},{system,31402984},{atom,540873},{atom_used,524449},{binary,9781760},{code,9692859},{ets,721496}]

Post change:

Generating Keys took 2416 milliseconds
Memory footprint [{total,284678120},{processes,258349528},{processes_used,257758568},{system,26328592},{atom,893161},{atom_used,878150},{binary,1013880},{code,11770188},{ets,774224}]
Generating new tree took 0 milliseconds
Loading tree took 2072 milliseconds
Memory footprint [{total,49957448},{processes,17244856},{processes_used,16653896},{system,32712592},{atom,893161},{atom_used,878216},{binary,7397496},{code,11770188},{ets,774224}]
Exporting tree took 448 milliseconds
Importing tree took 108 milliseconds
Memory footprint [{total,46504880},{processes,11197344},{processes_used,10606384},{system,35307536},{atom,893161},{atom_used,878216},{binary,9992112},{code,11770188},{ets,774224}]
Garbage collect
Memory footprint [{total,47394048},{processes,12223608},{processes_used,11632520},{system,35170440},{atom,893161},{atom_used,878216},{binary,9855008},{code,11770188},{ets,774224}]

* Tidy-up

* Add type

* Remove ++ requiring copy of Acc

Rely on mechanism producing a sorted result, not sorting

* Update src/leveled_tictac.erl

Co-authored-by: Thomas Arts <thomas.arts@quviq.com>

* Update following review

---------

Co-authored-by: Thomas Arts <thomas.arts@quviq.com>
---
 src/leveled_penciller.erl |   8 +-
 src/leveled_tictac.erl    | 291 ++++++++++++++++++++++++++++++--------
 2 files changed, 230 insertions(+), 69 deletions(-)

diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index eeb358fa..4c9e7cbb 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -159,7 +159,7 @@
 
 -behaviour(gen_server).
 
--include("include/leveled.hrl").
+-include("leveled.hrl").
 
 -export([
         init/1,
@@ -207,23 +207,17 @@
 -export([clean_testdir/1]).
 -endif.
 
--define(MAX_WORK_WAIT, 300).
 -define(MANIFEST_FP, "ledger_manifest").
 -define(FILES_FP, "ledger_files").
--define(CURRENT_FILEX, "crr").
--define(PENDING_FILEX, "pnd").
 -define(SST_FILEX, ".sst").
 -define(ARCHIVE_FILEX, ".bak").
 -define(SUPER_MAX_TABLE_SIZE, 40000).
--define(PROMPT_WAIT_ONL0, 5).
 -define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
 -define(COIN_SIDECOUNT, 4).
 -define(SLOW_FETCH, 500000). % Log a very slow fetch - longer than 500ms
 -define(FOLD_SCANWIDTH, 32).
 -define(ITERATOR_SCANWIDTH, 4).
 -define(ITERATOR_MINSCANWIDTH, 1).
--define(TIMING_SAMPLECOUNTDOWN, 10000).
--define(TIMING_SAMPLESIZE, 100).
 -define(SHUTDOWN_LOOPS, 10).
 -define(SHUTDOWN_PAUSE, 10000).
     % How long to wait for snapshots to be released on shutdown
diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl
index e84c46a3..d4b07985 100644
--- a/src/leveled_tictac.erl
+++ b/src/leveled_tictac.erl
@@ -52,8 +52,6 @@
 
 -module(leveled_tictac).
 
--include("include/leveled.hrl").
-
 -export([
             new_tree/1,
             new_tree/2,
@@ -102,10 +100,12 @@
                         size  :: tree_size(),
                         width :: integer(),
                         segment_count :: integer(),
-                        level1 :: binary(),
-                        level2 :: any() % an array - but OTP compatibility
+                        level1 :: level1_map(),
+                        level2 :: array:array()
                         }).
 
+-type level1_map() :: #{non_neg_integer() => binary()}.
+
 -type tictactree() ::
     #tictactree{}.
 -type segment48() ::
@@ -114,6 +114,11 @@
     {binary(), integer(), integer(), integer(), binary()}.
 -type tree_size() :: 
     xxsmall|xsmall|small|medium|large|xlarge.
+-type bin_extract_fun()
+    ::
+    fun((term(), term()) ->
+        {binary(), binary()|{is_hash, non_neg_integer()}}
+    ).
 
 -export_type([tictactree/0, segment48/0, tree_size/0]).
 
@@ -137,7 +142,7 @@ new_tree(TreeID) ->
 new_tree(TreeID, Size) ->
     Width = get_size(Size),
     Lv1Width = Width * ?HASH_SIZE * 8,
-    Lv1Init = <<0:Lv1Width/integer>>,
+    Lv1Init = to_level1_map(<<0:Lv1Width/integer>>),
     Lv2Init = array:new([{size, Width}, {default, ?EMPTY}]),
     #tictactree{treeID = TreeID,
                     size = Size,
@@ -159,9 +164,16 @@ export_tree(Tree) ->
     L2 = 
         lists:foldl(EncodeL2Fun, [], lists:seq(0, Tree#tictactree.width - 1)),
     {struct, 
-        [{<<"level1">>, base64:encode_to_string(Tree#tictactree.level1)}, 
-            {<<"level2">>, {struct, lists:reverse(L2)}}
-            ]}.
+        [{<<"level1">>,
+                base64:encode_to_string(
+                    from_level1_map(Tree#tictactree.level1)
+                )
+            }, 
+            {<<"level2">>,
+                {struct, lists:reverse(L2)}
+            }
+        ]
+    }.
 
 -spec import_tree({struct, list()}) -> tictactree().
 %% @doc
@@ -174,8 +186,9 @@ import_tree(ExportedTree) ->
     Sizes = lists:map(fun(SizeTag) -> {SizeTag, get_size(SizeTag)} end, 
                         ?VALID_SIZES),
     Width = byte_size(L1Bin) div ?HASH_SIZE,
-    {Size, Width} = lists:keyfind(Width, 2, Sizes),
-    Width = get_size(Size),
+    {Size, _Width} = lists:keyfind(Width, 2, Sizes),
+   %% assert that side is indeed the provided width
+    true = get_size(Size) == Width,
     Lv2Init = array:new([{size, Width}]),
     FoldFun = 
         fun({X, EncodedL2SegBin}, L2Array) ->
@@ -183,15 +196,18 @@ import_tree(ExportedTree) ->
             array:set(binary_to_integer(X), L2SegBin, L2Array)
         end,
     Lv2 = lists:foldl(FoldFun, Lv2Init, L2List),
-    #tictactree{treeID = import,
-                    size = Size,
-                    width = Width,
-                    segment_count = Width * ?L2_CHUNKSIZE,
-                    level1 = L1Bin,
-                    level2 = Lv2}.
-
-
--spec add_kv(tictactree(), term(), term(), fun()) -> tictactree().
+    garbage_collect(),
+    #tictactree{
+        treeID = import,
+        size = Size,
+        width = Width,
+        segment_count = Width * ?L2_CHUNKSIZE,
+        level1 = to_level1_map(L1Bin),
+        level2 = Lv2
+    }.
+
+
+-spec add_kv(tictactree(), term(), term(), bin_extract_fun()) -> tictactree().
 %% @doc
 %% Add a Key and value to a tictactree using the BinExtractFun to extract a 
 %% binary from the Key and value from which to generate the hash.  The 
@@ -200,8 +216,9 @@ import_tree(ExportedTree) ->
 add_kv(TicTacTree, Key, Value, BinExtractFun) ->
     add_kv(TicTacTree, Key, Value, BinExtractFun, false).
 
--spec add_kv(tictactree(), term(), term(), fun(), boolean()) 
-                                    -> tictactree()|{tictactree(), integer()}.
+-spec add_kv(
+    tictactree(), term(), term(), bin_extract_fun(), boolean())
+        -> tictactree()|{tictactree(), integer()}.
 %% @doc
 %% add_kv with ability to return segment ID of Key added
 add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) ->
@@ -215,14 +232,15 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) ->
     SegLeaf2Upd = SegLeaf2 bxor SegChangeHash,
     SegLeaf1Upd = SegLeaf1 bxor SegChangeHash,
 
+    UpdatedTree =
+        replace_segment(
+            SegLeaf1Upd, SegLeaf2Upd, L1Extract, L2Extract, TicTacTree
+        ),
     case ReturnSegment of
         true -> 
-            {replace_segment(SegLeaf1Upd, SegLeaf2Upd, 
-                            L1Extract, L2Extract, TicTacTree),
-                Segment};
+            {UpdatedTree, Segment};
         false ->
-            replace_segment(SegLeaf1Upd, SegLeaf2Upd, 
-                            L1Extract, L2Extract, TicTacTree)
+            UpdatedTree
     end.
 
 -spec alter_segment(integer(), integer(), tictactree()) -> tictactree().
@@ -241,8 +259,9 @@ alter_segment(Segment, Hash, Tree) ->
 %% Returns a list of segment IDs which hold differences between the state
 %% represented by the two trees.
 find_dirtyleaves(SrcTree, SnkTree) ->
-    Size = SrcTree#tictactree.size,
-    Size = SnkTree#tictactree.size,
+    SizeSrc = SrcTree#tictactree.size,
+    SizeSnk = SnkTree#tictactree.size,
+    true = SizeSrc == SizeSnk, 
     
     IdxList = find_dirtysegments(fetch_root(SrcTree), fetch_root(SnkTree)),
     SrcLeaves = fetch_leaves(SrcTree, IdxList),
@@ -250,12 +269,19 @@ find_dirtyleaves(SrcTree, SnkTree) ->
     
     FoldFun =
         fun(Idx, Acc) ->
-            {Idx, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves),
-            {Idx, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves),
+            {_, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves),
+            {_, SnkLeaf} = lists:keyfind(Idx, 1, SnkLeaves),
             L2IdxList = segmentcompare(SrcLeaf, SnkLeaf),
-            Acc ++ lists:map(fun(X) -> X + Idx * ?L2_CHUNKSIZE end, L2IdxList)
+            lists:foldl(
+                fun(X, InnerAcc) ->
+                    SegID = X + Idx * ?L2_CHUNKSIZE,
+                    [SegID|InnerAcc]
+                end,
+                Acc,
+                L2IdxList)
         end,
-    lists:sort(lists:foldl(FoldFun, [], IdxList)).
+    %% Output not sorted, as sorted by the design of the construction process
+    lists:foldl(FoldFun, [], IdxList).
 
 -spec find_dirtysegments(binary(), binary()) -> list(integer()).
 %% @doc
@@ -268,7 +294,7 @@ find_dirtysegments(SrcBin, SinkBin) ->
 %% @doc
 %% Return the level1 binary for a tree.
 fetch_root(TicTacTree) ->
-    TicTacTree#tictactree.level1.
+    from_level1_map(TicTacTree#tictactree.level1).
 
 -spec fetch_leaves(tictactree(), list(integer())) -> list().
 %% @doc
@@ -303,15 +329,21 @@ merge_trees(TreeA, TreeB) ->
             NewLevel2 = merge_binaries(L2A, L2B),
             array:set(SQN, NewLevel2, MergeL2)
         end,
-    NewLevel2 = lists:foldl(MergeFun,
-                                MergedTree#tictactree.level2,
-                                lists:seq(0, MergedTree#tictactree.width - 1)),
+    NewLevel2 =
+        lists:foldl(
+            MergeFun,
+            MergedTree#tictactree.level2,
+            lists:seq(0, MergedTree#tictactree.width - 1)
+        ),
     
-    MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}.
-
--spec get_segment(integer(), 
-                    integer()|xxsmall|xsmall|small|medium|large|xlarge) -> 
-                        integer().
+    MergedTree#tictactree{
+        level1 = to_level1_map(NewLevel1),
+        level2 = NewLevel2
+    }.
+
+-spec get_segment(
+    integer(), 
+    integer()|xxsmall|xsmall|small|medium|large|xlarge) -> integer().
 %% @doc
 %% Return the segment ID for a Key.  Can pass the tree size or the actual
 %% segment count derived from the size
@@ -339,8 +371,8 @@ tictac_hash(BinKey, Val) when is_binary(BinKey) ->
         end,
     {HashKeyToSeg, AltHashKey bxor HashVal}.
 
--spec keyto_doublesegment32(binary())
-                                    -> {non_neg_integer(), non_neg_integer()}.
+-spec keyto_doublesegment32(
+    binary()) -> {non_neg_integer(), non_neg_integer()}.
 %% @doc
 %% Used in tictac_hash/2 to provide an alternative hash of the key to bxor with
 %% the value, as well as the segment hash to locate the leaf of the tree to be
@@ -372,8 +404,8 @@ keyto_segment48(BinKey) ->
         _Rest/binary>> =  crypto:hash(md5, BinKey),
     {segment_hash, SegmentID, ExtraHash, AltHash}.
 
--spec generate_segmentfilter_list(list(integer()), tree_size())  
-                                                    -> false|list(integer()). 
+-spec generate_segmentfilter_list(
+    list(integer()), tree_size())  -> false|list(integer()). 
 %% @doc
 %% Cannot accelerate segment listing for trees below certain sizes, so check
 %% the creation of segment filter lists with this function
@@ -398,8 +430,8 @@ generate_segmentfilter_list(SegmentList, Size) ->
             SegmentList
     end.
 
--spec adjust_segmentmatch_list(list(integer()), tree_size(), tree_size())
-                                                            -> list(integer()).
+-spec adjust_segmentmatch_list(
+    list(integer()), tree_size(), tree_size()) -> list(integer()).
 %% @doc
 %% If we have dirty segments discovered by comparing trees of size CompareSize,
 %% and we want to see if it matches a segment for a key which was created for a
@@ -441,8 +473,8 @@ adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) ->
     end.
 
 
--spec match_segment({integer(), tree_size()}, {integer(), tree_size()}) 
-                                                            -> boolean().
+-spec match_segment(
+    {integer(), tree_size()}, {integer(), tree_size()}) -> boolean().
 %% @doc
 %% Does segment A match segment B - given that segment A was generated using
 %% Tree size A and segment B was generated using Tree Size B
@@ -462,8 +494,29 @@ join_segment(BranchID, LeafID) ->
 %%% Internal functions
 %%%============================================================================
 
--spec extract_segment(integer(), tictactree()) -> 
-                        {integer(), integer(), tree_extract(), tree_extract()}.
+-spec to_level1_map(binary()) -> level1_map().
+to_level1_map(L1Bin) ->
+    to_level1_map_loop(L1Bin, maps:new(), 0).
+
+to_level1_map_loop(<<>>, L1MapAcc, _Idx) ->
+    L1MapAcc;
+to_level1_map_loop(<<Slice:64/binary, Rest/binary>>, L1MapAcc, Idx) ->
+    to_level1_map_loop(Rest, maps:put(Idx, Slice, L1MapAcc), Idx + 1).
+
+
+-spec from_level1_map(level1_map()) -> binary().
+from_level1_map(L1Map) ->
+    lists:foldl(
+        fun(I, Acc) ->
+            <<Acc/binary, (maps:get(I, L1Map))/binary>>
+        end,
+        <<>>,
+        lists:seq(0, maps:size(L1Map) - 1)
+    ).
+
+-spec extract_segment(
+    integer(), tictactree()) -> 
+        {integer(), integer(), tree_extract(), tree_extract()}.
 %% @doc
 %% Extract the Level 1 and Level 2 slices from a tree to prepare an update
 extract_segment(Segment, TicTacTree) ->
@@ -472,9 +525,10 @@ extract_segment(Segment, TicTacTree) ->
     Level1Pos =
         (Segment bsr ?L2_BITSIZE)
             band (TicTacTree#tictactree.width - 1),
+    Level1Slice = Level1Pos div 16,
     
     Level2BytePos = ?HASH_SIZE * Level2Pos,
-    Level1BytePos = ?HASH_SIZE * Level1Pos,
+    Level1BytePos = ?HASH_SIZE * (Level1Pos rem 16),
     
     Level2 = get_level2(TicTacTree, Level1Pos),
     
@@ -484,7 +538,7 @@ extract_segment(Segment, TicTacTree) ->
         PostL2/binary>> = Level2,
     <<PreL1:Level1BytePos/binary,
         SegLeaf1:HashIntLength/integer,
-        PostL1/binary>> = TicTacTree#tictactree.level1,
+        PostL1/binary>> = maps:get(Level1Slice, TicTacTree#tictactree.level1),
     
     {SegLeaf1, 
         SegLeaf2, 
@@ -492,25 +546,26 @@ extract_segment(Segment, TicTacTree) ->
         {PreL2, Level2BytePos, Level2Pos, HashIntLength, PostL2}}.
 
 
--spec replace_segment(integer(), integer(), 
-                        tree_extract(), tree_extract(), 
-                        tictactree()) -> tictactree().
+-spec replace_segment(
+    integer(), integer(), tree_extract(), tree_extract(), tictactree()) ->
+        tictactree().
 %% @doc
 %% Replace a slice of a tree
 replace_segment(L1Hash, L2Hash, L1Extract, L2Extract, TicTacTree) ->
     {PreL1, Level1BytePos, Level1Pos, HashIntLength, PostL1} = L1Extract,
     {PreL2, Level2BytePos, _Level2Pos, HashIntLength, PostL2} = L2Extract,
 
+    Level1Slice = Level1Pos div 16,
+
     Level1Upd = <<PreL1:Level1BytePos/binary,
                     L1Hash:HashIntLength/integer,
                     PostL1/binary>>,
     Level2Upd = <<PreL2:Level2BytePos/binary,
                     L2Hash:HashIntLength/integer,
                     PostL2/binary>>,
-    TicTacTree#tictactree{level1 = Level1Upd,
-                            level2 = array:set(Level1Pos,
-                                                Level2Upd,
-                                                TicTacTree#tictactree.level2)}.
+    TicTacTree#tictactree{
+        level1 = maps:put(Level1Slice, Level1Upd, TicTacTree#tictactree.level1),
+        level2 = array:set(Level1Pos, Level2Upd, TicTacTree#tictactree.level2)}.
 
 get_level2(TicTacTree, L1Pos) ->
     case array:get(L1Pos, TicTacTree#tictactree.level2) of 
@@ -553,7 +608,7 @@ segmentcompare(SrcBin, SnkBin, Acc, Counter) ->
     <<SrcHash:?HASH_SIZE/binary, SrcTail/binary>> = SrcBin,
     <<SnkHash:?HASH_SIZE/binary, SnkTail/binary>> = SnkBin,
     case SrcHash of
-        SnkHash ->
+        H when H == SnkHash  ->
             segmentcompare(SrcTail, SnkTail, Acc, Counter + 1);
         _ ->
             segmentcompare(SrcTail, SnkTail, [Counter|Acc], Counter + 1)
@@ -576,7 +631,7 @@ merge_binaries(BinA, BinB) ->
 -include_lib("eunit/include/eunit.hrl").
 
 checktree(TicTacTree) ->
-    checktree(TicTacTree#tictactree.level1, TicTacTree, 0).
+    checktree(from_level1_map(TicTacTree#tictactree.level1), TicTacTree, 0).
 
 checktree(<<>>, TicTacTree, Counter) ->
     true = TicTacTree#tictactree.width == Counter;
@@ -656,6 +711,7 @@ simple_test_withsize(Size) ->
     DL0 = find_dirtyleaves(Tree1, Tree0),
     ?assertMatch(true, lists:member(GetSegFun(K1), DL0)),
     DL1 = find_dirtyleaves(Tree3, Tree1),
+    ?assertMatch(DL1, lists:sort(DL1)),
     ?assertMatch(true, lists:member(GetSegFun(K2), DL1)),
     ?assertMatch(true, lists:member(GetSegFun(K3), DL1)),
     ?assertMatch(false, lists:member(GetSegFun(K1), DL1)),
@@ -665,6 +721,53 @@ simple_test_withsize(Size) ->
     ImpTree3 = import_tree(ExpTree3),
     ?assertMatch(DL1, find_dirtyleaves(ImpTree3, Tree1)).
 
+dirtyleaves_sorted_test() ->
+    Tree0 = new_tree(test, large),
+    KVL1 =
+        lists:map(
+            fun(I) -> 
+                {{o, to_bucket(I rem 8), to_key(I), null},
+                    {is_hash, erlang:phash2(integer_to_binary(I))}}
+            end,
+            lists:seq(1, 50000)
+        ),
+    KVL2 =
+        lists:map(
+            fun(I) -> 
+                {{o, to_bucket(I rem 8), to_key(I), null},
+                    {is_hash, erlang:phash2(integer_to_binary(I))}}
+            end,
+            lists:seq(100000, 150000)
+        ),
+    Tree1 =
+        lists:foldl(
+            fun({K, V}, Acc) ->
+                add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end)
+            end,
+            Tree0,
+            KVL1
+        ),
+    Tree2 =
+        lists:foldl(
+            fun({K, V}, Acc) ->
+                add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end)
+            end,
+            Tree0,
+            KVL2
+        ),
+    SW0 = os:system_time(millisecond),
+    DL1 = find_dirtyleaves(Tree1, Tree2),
+    DL2 = find_dirtyleaves(Tree2, Tree1),
+    io:format(
+        user,
+        "Finding approx 100K dirty leaves twice in ~w milliseconds~n",
+        [os:system_time(millisecond) - SW0]
+    ),
+    ?assertMatch(DL1, lists:sort(DL1)),
+    ?assertMatch(DL2, lists:sort(DL2)),
+    ?assertMatch(DL1, DL2).
+
+
 merge_bysize_small_test() ->
     merge_test_withsize(small).
 
@@ -870,6 +973,70 @@ find_dirtysegments_withanemptytree_test() ->
     ?assertMatch(ExpectedAnswer, find_dirtysegments(fetch_root(T3), <<>>)).
 
 
+tictac_perf_test_() ->
+    {timeout, 120, fun tictac_perf_tester_multi/0}.
+
+tictac_perf_tester_multi() ->
+    tictac_perf_tester(1000000, large),
+    tictac_perf_tester(40000, small).
+
+tictac_perf_tester(KeyCount, TreeSize) ->
+    io:format(user, "Testing with Tree Size ~w~n", [TreeSize]),
+    io:format(user, "Generating ~w Keys and Hashes~n", [KeyCount]),
+    SW0 = os:system_time(millisecond),
+    KVL =
+        lists:map(
+            fun(I) -> 
+                {{o, to_bucket(I rem 8), to_key(I), null},
+                    {is_hash, erlang:phash2(integer_to_binary(I))}}
+            end,
+            lists:seq(1, KeyCount)
+        ),
+    
+    SW1 = os:system_time(millisecond),
+    io:format(user, "Generating Keys took ~w milliseconds~n", [SW1 - SW0]),
+
+    Tree = new_tree(test, TreeSize),
+    log_memory_footprint(),
+
+    SW2 = os:system_time(millisecond),
+    io:format(user, "Generating new tree took ~w milliseconds~n", [SW2 - SW1]),
+
+    UpdTree =
+        lists:foldl(
+            fun({K, V}, Acc) ->
+                add_kv(Acc, K, V, fun(K0, V0) -> {element(3, K0), V0} end)
+            end,
+            Tree,
+            KVL
+        ),
+    
+    SW3 = os:system_time(millisecond),
+    io:format(user, "Loading tree took ~w milliseconds~n", [SW3 - SW2]),
+    log_memory_footprint(),
+
+    ExportedTree = export_tree(UpdTree),
+
+    SW4 = os:system_time(millisecond),
+    io:format(user, "Exporting tree took ~w milliseconds~n", [SW4 - SW3]),
+
+    ImportedTree = import_tree(ExportedTree),
+
+    SW5 = os:system_time(millisecond),
+    io:format(user, "Importing tree took ~w milliseconds~n", [SW5 - SW4]),
+
+    log_memory_footprint(),
+
+    ?assertMatch([], find_dirtyleaves(UpdTree, ImportedTree)).
+
+to_key(N) ->
+    list_to_binary(io_lib:format("K~8..0B", [N])).
+
+to_bucket(N) ->
+    list_to_binary(io_lib:format("B~8..0B", [N])).
+
+log_memory_footprint() ->
+    io:format(user, "Memory footprint ~0p~n", [erlang:memory()]).
 
 
 -endif.