From 4c95db3223336cc85cb495c90e58d6be3826beed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Plewa?= <lukasz.plewa@intel.com>
Date: Tue, 16 Aug 2022 15:39:27 +0200
Subject: [PATCH] Squashed 'src/deps/miniasync/' changes from
 706d2635f..f3b809217

f3b809217 Merge pull request #110 from lplewa/master
11fddd440 masync: suppres unused parameters in future.h
1c6ba8d18 masync: 0.2.0-rc2 release
76ba3a10f Merge pull request #108 from pbalcer/fix-use-after-free-membuf
c2885af40 common: fix use-after-free in membuf in mt scenarios
1914de3de Merge pull request #106 from DamianDuy/updateAsyncDoc
ffedbde35 doc: update async property documentation
12e9fa42c Merge pull request #105 from DamianDuy/addAsyncDoc
cb1734572 doc: add async property to documentation
3b44a2cb9 Merge pull request #100 from DamianDuy/addAsyncFlag
9f740e491 masync: add flag indicating that future is async
b7216cab1 Merge pull request #90 from DamianDuy/addFlushOp
01ef54aa4 masync: add flush operation for DML data mover
c0c8f9ddf Merge pull request #104 from lukaszstolarczuk/update-docs
eb41ffcee doc: update code styling to properly appear in manpage
e573f7e89 Merge pull request #102 from lplewa/readback
c98664d80 Merge pull request #103 from pbalcer/atomic-load-ringbuf
2490b0cee common: use correct atomics in ringbuf
428ecdaf9 masync: add destination_readback flag to persistent memory writes
f1203f533 Merge pull request #71 from kswiecicki/masync-example-hashmap
bc1d04ef7 masync: add hashmap example
352b4c1aa Merge pull request #92 from lukaszstolarczuk/add-macro-is-init
a6e312918 Merge pull request #96 from kilobyte/no-arch-ifdefs
27e8795a8 masync: add macro to check if chain entry was initialized
790bb3522 Merge pull request #99 from lukaszstolarczuk/fix-manpage-header
af2e47738 common: replace improper 'RPMA' in man pages with 'MINIASYNC'
01e68810b masync: drop unused #ifdefs that block portability

git-subtree-dir: src/deps/miniasync
git-subtree-split: f3b809217fc6f4401c227e707708a803725290bc
---
 ChangeLog                                     |   22 +-
 doc/data_mover_dml_get_vdm.3.md               |    5 +-
 doc/data_mover_threads_new.3.md               |    2 +-
 doc/manuals.txt                               |    1 +
 doc/miniasync_future.7.md                     |   61 +-
 doc/miniasync_vdm.7.md                        |   10 +-
 doc/miniasync_vdm_dml.7.md                    |    7 +-
 doc/miniasync_vdm_threads.7.md                |    2 +-
 doc/runtime_wait.3.md                         |    8 +-
 doc/vdm_flush.3.md                            |   59 +
 doc/vdm_is_supported.3.md                     |    4 +-
 doc/vdm_memcpy.3.md                           |   10 +-
 doc/vdm_memmove.3.md                          |   14 +-
 doc/vdm_memset.3.md                           |   14 +-
 examples/CMakeLists.txt                       |    1 +
 examples/hashmap/hashmap.c                    | 1270 +++++++++++++++++
 extras/dml/data_mover_dml.c                   |   46 +
 src/core/membuf.c                             |   71 +-
 src/core/ringbuf.c                            |    5 +-
 src/core/util.h                               |    9 -
 src/data_mover_sync.c                         |    5 +
 src/data_mover_threads.c                      |   13 +
 src/include/libminiasync/future.h             |  121 +-
 src/include/libminiasync/vdm.h                |   52 +
 src/runtime.c                                 |   20 +
 tests/CMakeLists.txt                          |   26 +
 .../data_mover_dml_flush.c                    |   64 +
 .../test_data_mover_dml_flush.cmake           |   22 +
 .../test_data_mover_dml_memcpy.cmake          |    4 +-
 .../test_data_mover_dml_memmove.cmake         |    4 +-
 .../test_data_mover_dml_memset.cmake          |    4 +-
 tests/ex_hashmap/test_ex_hashmap.cmake        |   12 +
 tests/future/test_future.c                    |   14 +
 tests/future/test_future.cmake                |    7 +-
 .../future_properties/future_property_async.c |  278 ++++
 .../test_future_properties.cmake              |   12 +
 tests/membuf/membuf_simple.c                  |   38 +-
 .../memcpy_threads/test_memcpy_threads.cmake  |    2 +
 tests/memmove_sync/test_memmove_sync.cmake    |    2 +
 .../test_memmove_threads.cmake                |    2 +
 tests/memset_sync/test_memset_sync.cmake      |    2 +
 tests/memset_threads/memset_threads.c         |   16 +-
 .../memset_threads/test_memset_threads.cmake  |    2 +
 tests/runtime_test/runtime_test.c             |   98 ++
 tests/runtime_test/test_runtime.cmake         |   20 +
 tests/vdm/test_vdm.cmake                      |    7 +-
 .../test_vdm_operation_future_poll.cmake      |    2 +
 utils/check-commit.sh                         |    6 +-
 utils/check-commits.sh                        |    6 +-
 utils/md2man/default.man                      |    2 +-
 utils/src2mans.sh                             |    4 +-
 51 files changed, 2330 insertions(+), 158 deletions(-)
 create mode 100644 doc/vdm_flush.3.md
 create mode 100644 examples/hashmap/hashmap.c
 create mode 100644 tests/data_mover_dml_flush/data_mover_dml_flush.c
 create mode 100644 tests/data_mover_dml_flush/test_data_mover_dml_flush.cmake
 create mode 100644 tests/ex_hashmap/test_ex_hashmap.cmake
 create mode 100644 tests/future_properties/future_property_async.c
 create mode 100644 tests/future_properties/test_future_properties.cmake
 create mode 100644 tests/runtime_test/runtime_test.c
 create mode 100644 tests/runtime_test/test_runtime.cmake

diff --git a/ChangeLog b/ChangeLog
index 53457f70ec8..3ddd2738c0c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,13 +1,31 @@
+Thu Aug 11 2022 Łukasz Plewa <Lukasz.Plewa@intel.com>
+
+	* Version 0.2.0-rc1
+
+	common: use correct atomics in ringbuf
+	common: fix use-after-free in membuf in mt scenarios
+	common: replace improper 'RPMA' in man pages with 'MINIASYNC'
+	doc: add async property to documentation
+	doc: update async property documentation
+	doc: update code styling to properly appear in manpage
+	masync: drop unused #ifdefs that block portability
+	masync: add hashmap example
+	masync: add flush operation for DML data mover
+	masync: add flag indicating that future is async
+	masync: add destination_readback flag to persistent memory writes
+	masync: add macro to check if chain entry was initialized
+
+
 Fri May 20 2022 Weronika Lewandowska <weronika.lewandowska@intel.com>
 
 	* Version 0.1.0
 
 	This is the first official release of the miniasync library.
- 
+
 	The library provides the API for asynchronous memory operations
 	through the use of features and runtime mechanisms and independence
 	in terms of hardware by implementing a virtual data mover.
-	
+
 	This release also contains:
 	- documentation on the implemented functionality
 	- basic examples
diff --git a/doc/data_mover_dml_get_vdm.3.md b/doc/data_mover_dml_get_vdm.3.md
index ef5c1955afb..946a5f3b87a 100644
--- a/doc/data_mover_dml_get_vdm.3.md
+++ b/doc/data_mover_dml_get_vdm.3.md
@@ -47,6 +47,7 @@ structure *struct vdm* is needed by every **miniasync**(7) data mover operation.
 * **vdm_memcpy**(3) - memory copy operation
 * **vdm_memmove**(3) - memory move operation
 * **vdm_memset**(3) - memory set operation
+* **vdm_flush**(3) - cache flush operation
 
 # RETURN VALUE #
 
@@ -54,5 +55,5 @@ The **data_mover_dml_get_vdm**() function returns a pointer to *struct vdm* stru
 
 # SEE ALSO #
 
-**vdm_memcpy**(3), **vdm_memmove**(3), **vdm_memset**(3),
- **miniasync**(7), **miniasync_vdm_dml**(7) and **<https://pmem.io>**
+**vdm_flush**(3), **vdm_memcpy**(3), **vdm_memmove**(3), **vdm_memset**(3),
+**miniasync**(7), **miniasync_vdm_dml**(7) and **<https://pmem.io>**
diff --git a/doc/data_mover_threads_new.3.md b/doc/data_mover_threads_new.3.md
index a75adfab1ed..084aef594c6 100644
--- a/doc/data_mover_threads_new.3.md
+++ b/doc/data_mover_threads_new.3.md
@@ -58,7 +58,7 @@ The **data_mover_threads_default**() function allocates and initialzied a new th
 data mover structure with default parameters. It spanws *12* threads and creates a
 ringbuffer with size of *128* bytes.
 
-Currently, thread data mover supports following notifer types:
+Currently, thread data mover supports following notifier types:
 
 * **FUTURE_NOTIFIER_NONE**
 
diff --git a/doc/manuals.txt b/doc/manuals.txt
index 8554e8b8a59..639ed3399d2 100644
--- a/doc/manuals.txt
+++ b/doc/manuals.txt
@@ -20,3 +20,4 @@ runtime_wait.3
 vdm_memcpy.3
 vdm_memmove.3
 vdm_memset.3
+vdm_flush.3
diff --git a/doc/miniasync_future.7.md b/doc/miniasync_future.7.md
index d266a7c1cbb..d7c86d2181a 100644
--- a/doc/miniasync_future.7.md
+++ b/doc/miniasync_future.7.md
@@ -38,6 +38,9 @@ typedef void (*future_map_fn)(struct future_context *lhs,
 typedef void (*future_init_fn)(void *future,
 			struct future_context *chain_fut, void *arg);
 
+typedef int (*future_has_property_fn)(void *future,
+			enum future_property property);
+
 enum future_state {
 	FUTURE_STATE_IDLE,
 	FUTURE_STATE_COMPLETE,
@@ -73,6 +76,10 @@ struct future_notifier {
 	uint32_t padding;
 };
 
+enum future_property {
+	FUTURE_PROPERTY_ASYNC,
+};
+
 FUTURE(_name, _data_type, _output_type)
 FUTURE_INIT(_futurep, _taskfn)
 FUTURE_INIT_COMPLETE(_futurep)
@@ -80,6 +87,7 @@ FUTURE_CHAIN_ENTRY(_future_type, _name)
 FUTURE_CHAIN_ENTRY_LAST(_future_type, _name)
 FUTURE_CHAIN_ENTRY_INIT(_entry, _fut, _map, _map_arg)
 FUTURE_CHAIN_ENTRY_LAZY_INIT(_entry, _init, _init_arg, _map, _map_arg)
+FUTURE_CHAIN_ENTRY_IS_INITIALIZED(_entry)
 FUTURE_CHAIN_INIT(_futurep)
 FUTURE_AS_RUNNABLE(_futurep)
 FUTURE_OUTPUT(_futurep)
@@ -103,6 +111,7 @@ A future contains the following context:
 * structure for data which is the required state needed to perform the task
 * structure for output to store the result of the task
 * the size of the data and output structures (both can be 0)
+* a pointer for the property checking function which checks if the future has a custom property
 
 A future definition must begin with an instance of the *struct future* type, which
 contains all common metadata for all futures, followed by the structures for
@@ -132,14 +141,24 @@ A future implementation supporting **FUTURE_NOTIFIER_WAKER** type of notifier ca
 use a **FUTURE_WAKER_WAKE(_wakerp)** macro to signal the caller that some progress
 can be made and the future should be polled again.
 
-TODO: Mention **FUTURE_NOTIFIER_POLLER** when it becomes supported.
+<!-- TODO: Mention **FUTURE_NOTIFIER_POLLER** when it becomes supported. -->
+
+Futures can contain custom properties. Information, whether the future contains
+the property or not, is returned by the **future_has_property** function.
+Concrete implementation of the **future_has_property_fn** function should return the
+information, whether given future property applies to the future. Individual futures
+can be initialized using `FUTURE_INIT_EXT(_futurep, _taskfn, _propertyfn)` macro to
+set the property checking function. Futures initialized regularly have no properties applied.
+
+Supported properties:
+* **FUTURE_PROPERTY_ASYNC** property indicates that the future is asynchronous.
 
 For more information about the usage of future API, see *examples* directory
 in miniasync repository <https://github.com/pmem/miniasync>.
 
 # MACROS #
 
-**FUTURE(_name, _data_type, _output_type)** macro defines a future structure with *\_name*
+`FUTURE(_name, _data_type, _output_type)` macro defines a future structure with *\_name*
 as its name. Besides internal data needed by the future API, the defined structure contains
 data member of *\_data_type* type and output member of *\_output_type* type. User can
 provide the data that may later be retrieved in the future task implementation using
@@ -148,51 +167,59 @@ retrieved using **future_context_get_output**(3) function. Combined size of data
 structures can be retrieved using **future_context_get_size**(3) function. When the user has
 no need for input or output data, *\_data_type* and *\_output_type* can be defined as empty structures.
 
-**FUTURE_INIT(_futurep, _taskfn)** macro assigns task function *\_taskfn* to the future pointed
+`FUTURE_INIT(_futurep, _taskfn)` macro assigns task function *\_taskfn* to the future pointed
 by *\_futurep*. Task function must be of the *future_task_fn* type.
 
-**FUTURE_INIT_COMPLETE(_futurep)** macro instantiates a new already completed future with no assigned
+`FUTURE_INIT_EXT(_futurep, _taskfn, _propertyfn)` macro assigns task function *\_taskfn* and property
+checking function *\_propertyfn* to the future pointed by *\_futurep*. Task function must be of the
+*future_task_fn* type and the property checking function must be of the type *future_has_property_fn*.
+
+`FUTURE_INIT_COMPLETE(_futurep)` macro instantiates a new already completed future with no assigned
 task. This is helpful for handling initialization errors during future creation or simply for convenience
 in instantly ready futures.
 
-**FUTURE_CHAIN_ENTRY(_future_type, _name)** macro defines the future chain entry of the *\_future_type*
+`FUTURE_CHAIN_ENTRY(_future_type, _name)` macro defines the future chain entry of the *\_future_type*
 type named *\_name*. Future chain entries are defined as the members of chained future data structure
 using this macro. Chained future can be composed of multiple future chain entries that will be
 executed sequentially in the order they were defined.
 
-**FUTURE_CHAIN_ENTRY_LAST(_future_type, _name)** macro can be optionally used to indicate the last
+`FUTURE_CHAIN_ENTRY_LAST(_future_type, _name)` macro can be optionally used to indicate the last
 future in a chain. This lets software to include additional state inside of the *\_data_type* since
 otherwise the chain task implementation would not be able to differentiate between an entry and
 other data.
 
-**FUTURE_CHAIN_ENTRY_INIT(_entry, _fut, _map, _map_arg)** macro initializes the future chain
+`FUTURE_CHAIN_ENTRY_INIT(_entry, _fut, _map, _map_arg)` macro initializes the future chain
 entry pointed by *\_entry*. It requires pointer to the future instance *\_fut*, address of the mapping
 function *\_map* and the pointer to the argument for the mapping function *\_map_arg*. *\_fut* can either be
-the instance of the future defined using **FUTURE(_name, _data_type, _output_type)** macro or a virtual
+the instance of the future defined using `FUTURE(_name, _data_type, _output_type)` macro or a virtual
 data mover future. *\_map* function must be of the *future_map_fn* type and is an optional parameter.
 *map* function should define the mapping behavior of the data and output structures between chained
 future entry *\_entry* that has finished and the chained future entry that is about to start its execution.
 Chained future instance must initialize all of its future chain entries using this macro.
 
-**FUTURE_CHAIN_ENTRY_LAZY_INIT(_entry, _init, _init_arg, _map, _map_arg)** macro intializes the
+`FUTURE_CHAIN_ENTRY_LAZY_INIT(_entry, _init, _init_arg, _map, _map_arg)` macro initializes the
 future chain entry pointed by *\_entry* but it does not initialize its underlying future. Instead
-it uses function *\_init* and its argument *\init_arg* to instantiate the future right before
-its needed. This lets software instantiate futures with arguments derived from the results of
-previous entries in the chain. The *\map* and *\map_arg* variables behave the
+it uses function *\_init* and its argument *\_init_arg* to instantiate the future right before
+it's needed. This lets software instantiate futures with arguments derived from the results of
+previous entries in the chain. The *\_map* and *\_map_arg* variables behave the
 same as in **FUTURE_CHAIN_ENTRY_INIT**.
 
-**FUTURE_CHAIN_INIT(_futurep)** macro initializes the chained future at the address *\_futurep*.
+`FUTURE_CHAIN_ENTRY_IS_INITIALIZED(_entry)` macro checks if the future chain entry, pointed by *\_entry*,
+is already initialized (either lazily or regularly). If it is initialized, its structure can be safely
+accessed and used.
+
+`FUTURE_CHAIN_INIT(_futurep)` macro initializes the chained future at the address *\_futurep*.
 
-**FUTURE_AS_RUNNABLE(_futurep)** macro returns pointer to the runnable form of the future pointed by
+`FUTURE_AS_RUNNABLE(_futurep)` macro returns pointer to the runnable form of the future pointed by
 *\_futurep*. Runnable form of the future is required as an argument in **runtime_wait**(3) and
 **runtime_wait_multiple**(3) functions.
 
-**FUTURE_OUTPUT(_futurep)** macro returns the output of the future pointed by *\_futurep*.
+`FUTURE_OUTPUT(_futurep)` macro returns the output of the future pointed by *\_futurep*.
 
-**FUTURE_BUSY_POLL(_futurep)** repeatedly polls the future pointed by *\_futurep* until
+`FUTURE_BUSY_POLL(_futurep)` repeatedly polls the future pointed by *\_futurep* until
 it completes its execution. This macro does not use optimized polling.
 
-**FUTURE_WAKER_WAKE(_wakerp)** macro performs implementation-defined wake operation. It takes
+`FUTURE_WAKER_WAKE(_wakerp)` macro performs implementation-defined wake operation. It takes
 a pointer to the waker structure of *struct future_waker* type.
 
 # SEE ALSO #
diff --git a/doc/miniasync_vdm.7.md b/doc/miniasync_vdm.7.md
index 06053701fc9..61cfb2f75a4 100644
--- a/doc/miniasync_vdm.7.md
+++ b/doc/miniasync_vdm.7.md
@@ -42,12 +42,14 @@ struct vdm {
 	vdm_operation_delete op_delete;
 	vdm_operation_start op_start;
 	vdm_operation_check op_check;
+	unsigned capabilities;
 };
 
 enum vdm_operation_type {
 	VDM_OPERATION_MEMCPY,
 	VDM_OPERATION_MEMMOVE,
 	VDM_OPERATION_MEMSET,
+	VDM_OPERATION_FLUSH,
 };
 
 enum vdm_operation_result {
@@ -57,8 +59,9 @@ enum vdm_operation_result {
 };
 
 struct vdm_operation_data {
-	void *op;
+	void *data;
 	struct vdm *vdm;
+	struct vdm_operation operation;
 };
 
 struct vdm_operation_output {
@@ -67,6 +70,8 @@ struct vdm_operation_output {
 	union {
 		struct vdm_operation_output_memcpy memcpy;
 		struct vdm_operation_output_memmove memmove;
+		struct vdm_operation_output_memset memset;
+		struct vdm_operation_output_flush flush;
 	} output;
 };
 ```
@@ -95,6 +100,7 @@ Currently, virtual data mover API supports following operation types:
 * **VDM_OPERATION_MEMCPY** - a memory copy operation
 * **VDM_OPERATION_MEMMOVE** - a memory move operation
 * **VDM_OPERATION_MEMSET** - a memory set operation
+* **VDM_OPERATION_FLUSH** - a cache flush operation
 
 For more information about concrete data mover implementations, see **miniasync_vdm_threads**(7),
 **miniasync_vdm_synchronous**(7) and **miniasync_vdm_dml**(7).
@@ -122,7 +128,7 @@ The *result* field can be set to one of the following values:
 
 # SEE ALSO #
 
-**vdm_memcpy**(3), **vdm_memmove**(3), **vdm_memset**(3),
+**vdm_flush**(3), **vdm_memcpy**(3), **vdm_memmove**(3), **vdm_memset**(3),
 **miniasync**(7), **miniasync_future**(7),
 **miniasync_vdm_dml**(7), **miniasync_vdm_synchronous**(7),
 **miniasync_vdm_threads**(7) and **<https://pmem.io>**
diff --git a/doc/miniasync_vdm_dml.7.md b/doc/miniasync_vdm_dml.7.md
index 9e720c20961..583bedcf287 100644
--- a/doc/miniasync_vdm_dml.7.md
+++ b/doc/miniasync_vdm_dml.7.md
@@ -49,19 +49,20 @@ An example of **DML** data mover API usage with flags can be found in **EXAMPLE*
 
 When the future is polled for the first time the data mover operation will be executed
 asynchronously under the control of **DML** library. **DML** data mover does not
-block the calling thread
+block the calling thread.
 
 To create a new **DML** data mover instance, use **data_mover_dml_new**(3) function.
 
 **DML** data mover provides the following flags:
 
-* **MINIASYNC_DML_F_MEM_DURABLE** - write to destination is identified as write to durable memory
+* **VDM_F_MEM_DURABLE** - write to destination is identified as write to durable memory
 
 **DML** data mover supports following operations:
 
 * **vdm_memcpy**(3) - memory copy operation
 * **vdm_memmove**(3) - memory move operation
 * **vdm_memset**(3) - memory set operation
+* **vdm_flush**(3) - cache flush operation
 
 **DML** data mover does not support notifier feature. For more information about
 notifiers, see **miniasync_future**(7).
@@ -80,6 +81,6 @@ struct vdm_memcpy_future memcpy_fut = vdm_memcpy(dml_mover, dest, src,
 # SEE ALSO #
 
 **data_mover_dml_new**(3), **data_mover_dml_get_vdm**(3),
-**vdm_memcpy**(3), **vdm_memmove**(3), **vdm_memset**(3),
+**vdm_flush**(3), **vdm_memcpy**(3), **vdm_memmove**(3), **vdm_memset**(3),
 **miniasync**(7), **miniasync_future**(7), **miniasync_vdm**(7),
 **<https://github.com/intel/DML>** and **<https://pmem.io>**
diff --git a/doc/miniasync_vdm_threads.7.md b/doc/miniasync_vdm_threads.7.md
index ba6102692cf..ee598dc828c 100644
--- a/doc/miniasync_vdm_threads.7.md
+++ b/doc/miniasync_vdm_threads.7.md
@@ -55,7 +55,7 @@ Thread data mover supports following operations:
 * **vdm_memmove**(3) - memory move operation
 * **vdm_memset**(3) - memory set operation
 
-Thread data mover supports following notifer types:
+Thread data mover supports following notifier types:
 
 * **FUTURE_NOTIFIER_NONE** - no notifier
 * **FUTURE_NOTIFIER_WAKER** - waker
diff --git a/doc/runtime_wait.3.md b/doc/runtime_wait.3.md
index bc0426b568e..d9c8149546e 100644
--- a/doc/runtime_wait.3.md
+++ b/doc/runtime_wait.3.md
@@ -51,7 +51,13 @@ future with **future_poll**(3) function until completion.
 The **runtime_wait_multiple**() function works similar to the **runtime_wait**() function,
 additionally it facilitates polling of multiple futures in an array.  **runtime_wait_multiple**()
 function uniformly polls the first *nfuts* futures in the array pointed by *futs* until all
-of them complete execution.
+of them complete execution. Runtime execution can be influenced by future properties.
+For more information about the future properties, see **miniasync_future**(7).
+
+Properties, which affect runtime:
+* **FUTURE_PROPERTY_ASYNC** property should be applied to asynchronous futures.
+During **runtime_wait_multiple**() function, asynchronous futures have a priority over the
+synchronous ones and, in general, are being polled first.
 
 **miniasync**(7) runtime implementation makes use of the waker notifier feature to optimize
 future polling. For more information about the waker feature, see **miniasync_future**(7).
diff --git a/doc/vdm_flush.3.md b/doc/vdm_flush.3.md
new file mode 100644
index 00000000000..02d5ad700bf
--- /dev/null
+++ b/doc/vdm_flush.3.md
@@ -0,0 +1,59 @@
+---
+layout: manual
+Content-Style: 'text/css'
+title: _MP(VDM_FLUSH, 3)
+collection: miniasync
+header: VDM_FLUSH
+secondary_title: miniasync
+...
+
+[comment]: <> (SPDX-License-Identifier: BSD-3-Clause)
+[comment]: <> (Copyright 2022, Intel Corporation)
+
+[comment]: <> (vdm_flush.3 -- man page for miniasync vdm_flush operation)
+
+[NAME](#name)<br />
+[SYNOPSIS](#synopsis)<br />
+[DESCRIPTION](#description)<br />
+[RETURN VALUE](#return-value)<br />
+[SEE ALSO](#see-also)<br />
+
+# NAME #
+
+**vdm_flush**() - create a new flush virtual data mover operation structure
+
+# SYNOPSIS #
+
+```c
+#include <libminiasync.h>
+
+struct vdm_operation_output_flush {
+	uint64_t unused;
+};
+
+FUTURE(vdm_operation_future,
+	struct vdm_operation_data, struct vdm_operation_output);
+
+struct vdm_operation_future vdm_flush(struct vdm *vdm, void *dest, size_t n, uint64_t flags);
+```
+
+For general description of virtual data mover API, see **miniasync_vdm**(7).
+
+# DESCRIPTION #
+
+**vdm_flush**() initializes and returns a new flush future based on the virtual data mover
+implementation instance *vdm*. The parameters: *dest*, *n* are standard flush parameters.
+The *flags* represents data mover specific flags. The **flush** operation is
+implemented only for the DML data mover and cannot be used with synchronous and thread data movers.
+
+Flush future obtained using **vdm_flush**() will attempt to flush the *n* bytes of the processor
+caches at the *dest* address when its polled.
+
+## RETURN VALUE ##
+
+The **vdm_flush**() function returns an initialized *struct vdm_operation_future* flush future.
+
+# SEE ALSO #
+
+**vdm_memcpy**(3), **vdm_memmove**(3), **vdm_memset**(3), **miniasync**(7), **miniasync_vdm**(7),
+**miniasync_vdm_dml**(7) and **<https://pmem.io>**
diff --git a/doc/vdm_is_supported.3.md b/doc/vdm_is_supported.3.md
index 94480ec3d62..1ed1a30a4c2 100644
--- a/doc/vdm_is_supported.3.md
+++ b/doc/vdm_is_supported.3.md
@@ -37,7 +37,7 @@ static inline int vdm_is_supported(struct vdm *vdm, unsigned capability);
 Currently vdm defines the following capabilities:
 - **VDM_F_NO_CACHE_HINT** - If supported, user can pass this flag to the **vdm_memcpy**(), **vdm_memset**(), **vdm_memmove**()
 functions, to hint vdm to bypass CPU cache, and write the data directly to the memory. If not supported vdm will ignore this flag.
-- **VDM_F_MEM_DURABLE** -- If supported, user can pass this flag **vdm_memcpy**(), **vdm_memset**(), **vdm_memmove**() functions
+- **VDM_F_MEM_DURABLE** -- If supported, user can pass this flag to the **vdm_memcpy**(), **vdm_memset**(), **vdm_memmove**() functions
 to ensure that the data written has become persistent, when a future completes.
 
 ## RETURN VALUE ##
@@ -46,5 +46,5 @@ The **vdm_is_supported**() function returns nonzero if the given capability is s
 
 # SEE ALSO #
 
-**vdm_memmove**(3), **vdm_memset**(3), **miniasync**(7), **miniasync_vdm**(7),
+**vdm_flush**(3), **vdm_memcpy**(3), **vdm_memmove**(3), **vdm_memset**(3), **miniasync**(7), **miniasync_vdm**(7),
 **miniasync_vdm_dml**(7) and **<https://pmem.io>**
diff --git a/doc/vdm_memcpy.3.md b/doc/vdm_memcpy.3.md
index 96581f98449..df6299344f4 100644
--- a/doc/vdm_memcpy.3.md
+++ b/doc/vdm_memcpy.3.md
@@ -27,12 +27,6 @@ secondary_title: miniasync
 ```c
 #include <libminiasync.h>
 
-enum vdm_operation_type {
-	VDM_OPERATION_MEMCPY,
-	VDM_OPERATION_MEMMOVE,
-	VDM_OPERATION_MEMSET,
-};
-
 struct vdm_operation_output_memcpy {
 	void *dest;
 };
@@ -51,7 +45,7 @@ For general description of virtual data mover API, see **miniasync_vdm**(7).
 **vdm_memcpy**() initializes and returns a new memcpy future based on the virtual data mover
 implementation instance *vdm*. The parameters: *dest*, *src*, *n* are standard memcpy parameters.
 The *flags* represents data mover specific flags. For example, **miniasync_vdm_dml**(7) flag
-**MINIASYNC_DML_F_MEM_DURABLE** specifies that the write destination is identified as a write
+**VDM_F_MEM_DURABLE** specifies that the write destination is identified as a write
 to durable memory. This flag is meant to be used only with the **miniasync_vdm_dml**(7) data mover
 implementation, providing it to any other data mover will result in undefined behavior.
 
@@ -64,5 +58,5 @@ The **vdm_memcpy**() function returns an initialized *struct vdm_operation_futur
 
 # SEE ALSO #
 
-**vdm_memmove**(3), **vdm_memset**(3), **miniasync**(7), **miniasync_vdm**(7),
+**vdm_flush**(3), **vdm_memmove**(3), **vdm_memset**(3), **miniasync**(7), **miniasync_vdm**(7),
 **miniasync_vdm_dml**(7) and **<https://pmem.io>**
diff --git a/doc/vdm_memmove.3.md b/doc/vdm_memmove.3.md
index 3990be5458d..593ca78e203 100644
--- a/doc/vdm_memmove.3.md
+++ b/doc/vdm_memmove.3.md
@@ -27,12 +27,6 @@ secondary_title: miniasync
 ```c
 #include <libminiasync.h>
 
-enum vdm_operation_type {
-	VDM_OPERATION_MEMCPY,
-	VDM_OPERATION_MEMMOVE,
-	VDM_OPERATION_MEMSET,
-};
-
 struct vdm_operation_output_memmove {
 	void *dest;
 };
@@ -50,8 +44,10 @@ For general description of virtual data mover API, see **miniasync_vdm**(7).
 
 **vdm_memmove**() initializes and returns a new memmove future based on the virtual data mover
 implementation instance *vdm*. The parameters: *dest*, *src*, *n* are standard memmove parameters.
-The *flags* represents data mover specific flags. TODO: provide an example of the flags usage after
-implementing memmove future for dml.
+The *flags* represents data mover specific flags. For example, **miniasync_vdm_dml**(7) flag
+**VDM_F_MEM_DURABLE** specifies that the write destination is identified as a write to
+durable memory. This flag is meant to be used only with the **miniasync_vdm_dml**(7) data mover
+implementation, providing it to any other data mover will result in undefined behavior.
 
 Memmove future obtained using **vdm_memmove**() will attempt to move *n* bytes from memory area
 *src* to memory area *dest* when its polled.
@@ -62,5 +58,5 @@ The **vdm_memmove**() function returns an initialized *struct vdm_operation_futu
 
 # SEE ALSO #
 
-**vdm_memcpy**(3), **vdm_memset**(3), **miniasync**(7), **miniasync_vdm**(7),
+**vdm_flush**(3), **vdm_memcpy**(3), **vdm_memset**(3), **miniasync**(7), **miniasync_vdm**(7),
 **miniasync_vdm_dml**(7) and **<https://pmem.io>**
diff --git a/doc/vdm_memset.3.md b/doc/vdm_memset.3.md
index fd13e6997b3..75bb366faa4 100644
--- a/doc/vdm_memset.3.md
+++ b/doc/vdm_memset.3.md
@@ -27,12 +27,6 @@ secondary_title: miniasync
 ```c
 #include <libminiasync.h>
 
-enum vdm_operation_type {
-	VDM_OPERATION_MEMCPY,
-	VDM_OPERATION_MEMMOVE,
-	VDM_OPERATION_MEMSET,
-};
-
 struct vdm_operation_output_memset {
 	void *str;
 };
@@ -50,8 +44,10 @@ For general description of virtual data mover API, see **miniasync_vdm**(7).
 
 **vdm_memset**() initializes and returns a new memset future based on the virtual data mover
 implementation instance *vdm*. The parameters: *str*, *c*, *n* are standard memset parameters.
-The *flags* represents data mover specific flags. TODO: provide an example of the flags usage after
-implementing memset future for dml.
+The *flags* represents data mover specific flags. For example, **miniasync_vdm_dml**(7) flag
+**VDM_F_MEM_DURABLE** specifies that the write destination is identified as a write to
+durable memory. This flag is meant to be used only with the **miniasync_vdm_dml**(7) data mover
+implementation, providing it to any other data mover will result in undefined behavior.
 
 Memset future obtained using **vdm_memset**() will attempt to copy the character *c* to the
 first, *n* bytes of the memory area *str* when its polled.
@@ -62,5 +58,5 @@ The **vdm_memset**() function returns an initialized *struct vdm_operation_futur
 
 # SEE ALSO #
 
-**vdm_memcpy**(3), **vdm_memmove**(3), **miniasync**(7), **miniasync_vdm**(7),
+**vdm_flush**(3), **vdm_memcpy**(3), **vdm_memmove**(3), **miniasync**(7), **miniasync_vdm**(7),
 **miniasync_vdm_dml**(7) and **<https://pmem.io>**
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index f92060f46ec..c155d074f59 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -43,3 +43,4 @@ endfunction()
 # add all the examples with a use of the add_example function defined above
 add_example(basic basic/basic.c)
 add_example(basic-async basic-async/basic-async.c)
+add_example(hashmap hashmap/hashmap.c)
diff --git a/examples/hashmap/hashmap.c b/examples/hashmap/hashmap.c
new file mode 100644
index 00000000000..c8599e92601
--- /dev/null
+++ b/examples/hashmap/hashmap.c
@@ -0,0 +1,1270 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Copyright 2022, Intel Corporation */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include "libminiasync.h"
+#include "libminiasync/data_mover_threads.h"
+
+/* Avoid compatibility errors */
+#ifndef _MSC_VER
+#define bool_compare_and_swap __sync_bool_compare_and_swap
+#define fetch_and_add __sync_fetch_and_add
+#define fetch_and_sub __sync_fetch_and_sub
+#else
+#include <windows.h>
+#include <BaseTsd.h>
+typedef SSIZE_T ssize_t;
+
+static __inline int
+bool_compare_and_swap_MSVC(volatile LONG *ptr, LONG oldval, LONG newval)
+{
+	LONG old = InterlockedCompareExchange(ptr, newval, oldval);
+	return (old == oldval);
+}
+
+#define bool_compare_and_swap(p, o, n)\
+	bool_compare_and_swap_MSVC((LONG *)(p), (LONG)(o), (LONG)(n))
+#define fetch_and_add(ptr, value)\
+	InterlockedExchangeAdd((LONG *)(ptr), value)
+#define fetch_and_sub(ptr, value)\
+	InterlockedExchangeAdd((LONG *)(ptr), -value)
+#endif
+
+#define WAIT_FUTURES_MAX 4
+
+/* Polls 'nfuts' number of futures until they complete, makes use of runtime */
+#define WAIT_FUTURES(runtimep, futsp, nfuts)\
+do {\
+	struct future *tmp_futs[WAIT_FUTURES_MAX];\
+	for (int i = 0; i < nfuts; i++) {\
+		tmp_futs[i] = FUTURE_AS_RUNNABLE(&(futsp[i]));\
+	}\
+\
+	runtime_wait_multiple(r, tmp_futs, nfuts);\
+} while (0)
+
+/*
+ * Represents the state of hashmap entry. Each hashmap entry can be in one of
+ * the following states:
+ *
+ * 'HASHMAP_ENTRY_STATE_UNOCCUPIED' - hashmap entry is not occupied,
+ * 'HASHMAP_ENTRY_STATE_LOCKED' - hashmap entry is being locked,
+ * 'HASHMAP_ENTRY_STATE_PRESENT' - hashmap entry contains a key-value pair
+ */
+enum hashmap_entry_state {
+	HASHMAP_ENTRY_STATE_UNOCCUPIED,
+	HASHMAP_ENTRY_STATE_LOCKED,
+	HASHMAP_ENTRY_STATE_PRESENT,
+};
+
+/* Hashmap entry definition */
+struct hashmap_entry {
+	uint64_t key;
+	struct {
+		void *addr;
+		size_t size;
+	} value;
+	enum hashmap_entry_state state;
+};
+
+struct hashmap {
+	struct hashmap_entry *entries;
+	size_t capacity; /* Max stored entries */
+	size_t length; /* Current number of stored entries */
+};
+
+/* Hash function based on Austin Appleby MurmurHash3 64-bit finalizer */
+static size_t
+hash_val(uint64_t val)
+{
+	val ^= val >> 33;
+	val *= 0xff51afd7ed558ccd;
+	val ^= val >> 33;
+	val *= 0xc4ceb9fe1a85ec53;
+	val ^= val >> 33;
+
+	return val;
+}
+
+/* Checks if entry is empty */
+static inline int
+hashmap_entry_empty(struct hashmap_entry *hme)
+{
+	return (hme->state == HASHMAP_ENTRY_STATE_UNOCCUPIED && hme->key == 0);
+}
+
+/* Checks if entry is deleted */
+static inline int
+hashmap_entry_deleted(struct hashmap_entry *hme)
+{
+	return (hme->state == HASHMAP_ENTRY_STATE_UNOCCUPIED && hme->key != 0);
+}
+
+/* Checks if hashmap entry is unoccupied */
+static inline int
+hashmap_entry_unoccupied(struct hashmap_entry *hme)
+{
+	return hme->state == HASHMAP_ENTRY_STATE_UNOCCUPIED;
+}
+
+/* Creates a new hashmap instance */
+static struct hashmap *
+hashmap_new(size_t capacity)
+{
+	if (capacity <= 0) {
+		printf("hashmap capacity has to be bigger than 0\n");
+		return NULL;
+	}
+
+	struct hashmap *hm = malloc(sizeof(struct hashmap) * capacity);
+	if (hm == NULL) {
+		return NULL;
+	}
+
+	/* Allocate zero'd space */
+	hm->entries = calloc(capacity, sizeof(struct hashmap_entry));
+	if (hm->entries == NULL) {
+		goto free_hm;
+	}
+
+	hm->capacity = capacity; /* Maximum number of hashmap entries */
+	hm->length = 0; /* Number of currently occupied hashmap entries */
+
+	return hm;
+
+free_hm:
+	free(hm);
+	return NULL;
+}
+
+/* Deletes a hashmap instance */
+void
+hashmap_delete(struct hashmap *hm)
+{
+	for (size_t i = 0; i < hm->length; i++) {
+		if (hm->entries[i].state == HASHMAP_ENTRY_STATE_PRESENT) {
+			free(hm->entries[i].value.addr);
+		}
+	}
+
+	free(hm->entries);
+	free(hm);
+}
+
+/* Calculates the entry index based on provided key and hashmap capacity */
+static size_t
+hashmap_key_index(struct hashmap *hm, uint64_t key)
+{
+	return (hash_val(key) % (hm->capacity - 1));
+}
+
+/*
+ * Looks for the entry fitting provided key based on the 'state':
+ *
+ * 'HASHMAP_ENTRY_STATE_PRESENT' - looks for entry that's present
+ * 'HASHMAP_ENTRY_STATE_UNOCCUPIED' - looks for unoccupied entry
+ */
+static ssize_t
+hashmap_entry_lookup(struct hashmap *hm, uint64_t key,
+		enum hashmap_entry_state state)
+{
+	/* Start the search from index calculated based on the provided key */
+	ssize_t index = (ssize_t)hashmap_key_index(hm, key);
+
+	for (size_t i = 0; i < hm->capacity; i++,
+			index = (index + 1) % (ssize_t)hm->capacity) {
+		struct hashmap_entry *hme = &hm->entries[index];
+
+		switch (state) {
+			/* We look for the first unoccupied entry */
+			case HASHMAP_ENTRY_STATE_UNOCCUPIED:
+				if (hashmap_entry_unoccupied(hme)) {
+					return index;
+				}
+				break;
+			/* We look for the entry matching the provided key */
+			case HASHMAP_ENTRY_STATE_PRESENT:
+				if (hashmap_entry_deleted(hme)) {
+					/* Deleted entries are skipped */
+					continue;
+				} else if (hashmap_entry_empty(hme)) {
+					/* Empty entry terminates the search */
+					return -1;
+				}
+
+				if (hme->key == key) {
+					return index;
+				}
+				break;
+			default:
+				assert(0); /* Should not be reachable */
+		}
+
+	}
+
+	return -1;
+}
+
+/*
+ * Compares the current hashmap entry state with the provided 'old' state and
+ * changes it to the 'new' state when the comparison was successful.
+ */
+static unsigned
+hashmap_entry_set_state_impl(struct hashmap_entry *hme,
+		enum hashmap_entry_state old, enum hashmap_entry_state new)
+{
+	return bool_compare_and_swap(&hme->state, old, new);
+}
+
+/*
+ * BEGIN of hashmap_entry_set_state_fut
+ */
+struct hashmap_entry_set_state_data {
+	struct hashmap_entry *hme;
+	enum hashmap_entry_state old;
+	enum hashmap_entry_state new;
+};
+
+struct hashmap_entry_set_state_output {
+	unsigned changed;
+};
+
+FUTURE(hashmap_entry_set_state_fut, struct hashmap_entry_set_state_data,
+		struct hashmap_entry_set_state_output);
+
+/*
+ * hashmap_entry_set_state_fut future task function. Sets the hashmap entry
+ * state from the old to the new state.
+ */
+static enum future_state
+hashmap_entry_set_state_wrapped(struct future_context *ctx,
+		struct future_notifier *notifier)
+{
+	struct hashmap_entry_set_state_data *data =
+			future_context_get_data(ctx);
+	struct hashmap_entry_set_state_output *output =
+			future_context_get_output(ctx);
+
+	output->changed = hashmap_entry_set_state_impl(data->hme, data->old,
+			data->new);
+
+	return FUTURE_STATE_COMPLETE;
+}
+
+/* Creates and initializes a new hashmap_entry_set_state_fut future */
+static struct hashmap_entry_set_state_fut
+hashmap_entry_set_state(struct hashmap_entry *hme, enum hashmap_entry_state old,
+		enum hashmap_entry_state new)
+{
+	struct hashmap_entry_set_state_fut future;
+
+	/* Set input values */
+	future.data.hme = hme;
+	future.data.new = new;
+	future.data.old = old;
+	/* Set default output value */
+	future.output.changed = 0;
+
+	FUTURE_INIT(&future, hashmap_entry_set_state_wrapped);
+
+	return future;
+}
+/*
+ * END of hashmap_entry_set_state_fut
+ */
+
+/*
+ * BEGIN of hashmap_lookup_fut future
+ */
+struct hashmap_lookup_data {
+	struct hashmap *hm;
+	uint64_t key;
+	enum hashmap_entry_state state;
+};
+
+struct hashmap_lookup_output {
+	struct hashmap_entry *hme;
+};
+
+FUTURE(hashmap_lookup_fut, struct hashmap_lookup_data,
+		struct hashmap_lookup_output);
+
+/*
+ * hashmap_lookup_fut future task function. Looks for appropriate hashmap entry
+ * matching provided key.
+ */
+static enum future_state
+hashmap_lookup_impl(struct future_context *ctx,
+		struct future_notifier *notifier)
+{
+	struct hashmap_lookup_data *data =
+			future_context_get_data(ctx);
+	struct hashmap_lookup_output *output =
+			future_context_get_output(ctx);
+	struct hashmap *hm = data->hm;
+	uint64_t key = data->key;
+	enum hashmap_entry_state state = data->state;
+
+	struct hashmap_entry *hme = NULL;
+	if (key == 0) {
+		printf("invalid key %" PRIu64 "\n", key);
+		goto set_output;
+	} else if (state == HASHMAP_ENTRY_STATE_UNOCCUPIED &&
+			hm->capacity == hm->length) {
+		printf("no space left for key %" PRIu64 "\n", key);
+		goto set_output;
+	} else if (state == HASHMAP_ENTRY_STATE_UNOCCUPIED &&
+			hashmap_entry_lookup(hm, key,
+					HASHMAP_ENTRY_STATE_PRESENT) != -1) {
+		printf("key %" PRIu64 " already exists\n", key);
+		goto set_output;
+	}
+
+	ssize_t index = hashmap_entry_lookup(hm, key, state);
+	if (index == -1) {
+		switch (state) {
+			case HASHMAP_ENTRY_STATE_PRESENT:
+			/* Entry with given key is not present in the hashmap */
+				goto set_output;
+			case HASHMAP_ENTRY_STATE_UNOCCUPIED:
+			/*
+			 * An unoccupied entry wasn't found despite hashmap not
+			 * being full. Re-run the lookup future.
+			 */
+				return FUTURE_STATE_RUNNING;
+			default:
+				assert(0); /* Should not be reachable */
+		}
+	}
+
+	hme = &hm->entries[index];
+
+set_output:
+	output->hme = hme;
+	return FUTURE_STATE_COMPLETE;
+}
+
+/* Creates and initializes a new hashmap_lookup_fut future */
+static struct hashmap_lookup_fut
+hashmap_lookup(struct hashmap *hm, uint64_t key, enum hashmap_entry_state state)
+{
+	struct hashmap_lookup_fut future;
+	/* Set input values */
+	future.data.hm = hm;
+	future.data.key = key;
+	future.data.state = state;
+	/* Set default output value */
+	future.output.hme = NULL;
+
+	FUTURE_INIT(&future, hashmap_lookup_impl);
+
+	return future;
+}
+/*
+ * END of hashmap_lookup_fut future
+ */
+
+/*
+ * BEGIN of chain_entries_rerun_fut future
+ */
+struct chain_entries_rerun_data {
+	struct future_chain_entry **entries;
+	size_t n_entries;
+};
+
+struct chain_entries_rerun_output {
+	uint64_t unused; /* Avoid compiled empty struct error */
+};
+
+FUTURE(chain_entries_rerun_fut, struct chain_entries_rerun_data,
+		struct chain_entries_rerun_output);
+
+/*
+ * chain_entries_rerun_fut future task function. Marks provided chained future
+ * entries as not 'processed'. This future should only be used as an entry
+ * of chained future.
+ */
+static enum future_state
+chain_entries_rerun_impl(struct future_context *ctx,
+		struct future_notifier *notifier)
+{
+	struct chain_entries_rerun_data *data = future_context_get_data(ctx);
+
+	struct future_chain_entry **entries = data->entries;
+	size_t n_entries = data->n_entries;
+
+	unsigned rerun = 0;
+	for (size_t i = 0; i < n_entries; i++) {
+		struct future_chain_entry *entry = entries[i];
+		if (entry) {
+			entry->flags &= ~FUTURE_CHAIN_FLAG_ENTRY_PROCESSED;
+			entry->future.context.state = FUTURE_STATE_RUNNING;
+			rerun = 1;
+		}
+	}
+
+	if (rerun) {
+		/*
+		 * When we re-run this future all not 'processed' future
+		 * entries in a chained future will be re-run.
+		 */
+		return FUTURE_STATE_RUNNING;
+	}
+
+	return FUTURE_STATE_COMPLETE;
+}
+
+/* Creates and initializes a new chain_entries_rerun_fut future */
+static struct chain_entries_rerun_fut
+chain_entries_rerun(struct future_chain_entry **entries, size_t n_entries)
+{
+	struct chain_entries_rerun_fut future;
+	/* Set input values */
+	future.data.entries = entries;
+	future.data.n_entries = n_entries;
+
+	FUTURE_INIT(&future, chain_entries_rerun_impl);
+
+	return future;
+}
+/*
+ * END of chain_entries_rerun_fut future
+ */
+
+/*
+ * BEGIN of hashmap_lookup_lock_entry_fut future
+ */
+struct hashmap_lookup_lock_entry_data {
+	FUTURE_CHAIN_ENTRY(struct hashmap_lookup_fut, lookup);
+	FUTURE_CHAIN_ENTRY(struct hashmap_entry_set_state_fut, set_state);
+	FUTURE_CHAIN_ENTRY_LAST(struct chain_entries_rerun_fut, entries_rerun);
+	struct future_chain_entry *entriesp[2];
+};
+
+struct hashmap_lookup_lock_entry_output {
+	struct hashmap_entry *hme;
+};
+
+FUTURE(hashmap_lookup_lock_entry_fut, struct hashmap_lookup_lock_entry_data,
+		struct hashmap_lookup_lock_entry_output);
+
+/*
+ * Maps 'lookup' future entry output data to the 'set_state' future entry data.
+ */
+static void
+lookup_to_set_state_map(struct future_context *lookup_ctx,
+		struct future_context *set_state_ctx, void *arg)
+{
+	struct hashmap_lookup_output *lookup_output =
+			future_context_get_output(lookup_ctx);
+	struct hashmap_entry_set_state_data *set_state_data =
+			future_context_get_data(set_state_ctx);
+	struct hashmap_entry *hme = lookup_output->hme;
+
+	if (hme == NULL) {
+		/*
+		 * Entry lookup failed, no need to lock the entry in
+		 * 'locked' state.
+		 */
+		set_state_ctx->state = FUTURE_STATE_COMPLETE;
+	}
+
+	set_state_data->hme = hme;
+}
+
+/*
+ * Lazy initialization function. Initializes the 'chain_entries_rerun' future
+ * based on the 'lookup' and 'set_state' future entries results.
+ */
+static void
+chain_entry_rerun_init(void *future,
+		struct future_context *lookup_lock_entry_ctx, void *arg)
+{
+	struct hashmap_lookup_lock_entry_data *data =
+			future_context_get_data(lookup_lock_entry_ctx);
+	struct hashmap_lookup_lock_entry_output *output =
+			future_context_get_output(lookup_lock_entry_ctx);
+
+	struct chain_entries_rerun_fut fut;
+
+	struct hashmap_entry *hme = data->lookup.fut.output.hme;
+	unsigned locked = data->set_state.fut.output.changed;
+	if (hme != NULL && !locked) {
+		/*
+		 * 'lookup' found a hashmap entry, but 'set_state' failed to
+		 * lock it. We should re-run those two future entries to try
+		 * and find and lock a hashmap entry again.
+		 */
+		data->entriesp[0] = (struct future_chain_entry *)&data->lookup;
+		data->entriesp[1] =
+				(struct future_chain_entry *)&data->set_state;
+		fut = chain_entries_rerun(data->entriesp, 2);
+	} else {
+		/*
+		 * Either 'lookup' and 'set_state' successfuly found and locked
+		 * a hashmap entry or the 'lookup' failed. 'chain_entry_rerun'
+		 * shouldn't be executed.
+		 */
+		FUTURE_INIT_COMPLETE(&fut);
+
+		/*
+		 * At this point we can also set 'hashmap_lookup_lock_entry_fut'
+		 * future output, since it practically finished its job.
+		 */
+		output->hme = data->lookup.fut.output.hme;
+	}
+
+	memcpy(future, &fut, sizeof(fut));
+}
+
+/* Creates and initializes a new hashmap_lookup_lock_entry_fut future */
+static struct hashmap_lookup_lock_entry_fut
+hashmap_lookup_lock_entry(struct hashmap *hm, uint64_t key,
+		enum hashmap_entry_state state)
+{
+	struct hashmap_lookup_lock_entry_fut chain;
+	/* Initialize chained future entries */
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.lookup,
+			hashmap_lookup(hm, key, state),
+			lookup_to_set_state_map, NULL);
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.set_state,
+			hashmap_entry_set_state(NULL, state,
+					HASHMAP_ENTRY_STATE_LOCKED),
+			NULL, NULL);
+	FUTURE_CHAIN_ENTRY_LAZY_INIT(&chain.data.entries_rerun,
+			chain_entry_rerun_init, NULL, NULL, NULL);
+	/* Set default chained future output value */
+	chain.output.hme = NULL;
+
+	FUTURE_CHAIN_INIT(&chain);
+
+	return chain;
+}
+/*
+ * END of hashmap_lookup_lock_entry_fut future
+ */
+
+/*
+ * BEGIN of hashmap_get_fut future
+ */
+struct hashmap_get_copy_data {
+	FUTURE_CHAIN_ENTRY(struct hashmap_lookup_lock_entry_fut,
+			lookup_lock_entry);
+	FUTURE_CHAIN_ENTRY(struct vdm_operation_future, memcpy_value);
+	FUTURE_CHAIN_ENTRY(struct hashmap_entry_set_state_fut, set_state);
+};
+
+struct hashmap_get_copy_output {
+	void *value;
+	size_t size;
+	size_t copy_size;
+};
+
+FUTURE(hashmap_get_copy_fut, struct hashmap_get_copy_data,
+		struct hashmap_get_copy_output);
+
+/*
+ * Maps 'lookup_lock' future entry data to the 'memcpy_value' future entry
+ * data.
+ */
+static void
+lookup_lock_entry_to_memcpy_value_map(
+		struct future_context *lookup_lock_entry_ctx,
+		struct future_context *memcpy_value_ctx, void *arg)
+{
+	struct hashmap_lookup_lock_entry_output *lookup_lock_entry_output =
+			future_context_get_output(lookup_lock_entry_ctx);
+	struct vdm_operation_data *memcpy_value_data =
+			future_context_get_data(memcpy_value_ctx);
+
+	struct vdm_operation *memcpy_op = &memcpy_value_data->operation;
+
+	struct hashmap_entry *hme = lookup_lock_entry_output->hme;
+	if (hme == NULL) {
+		/*
+		 * 'lookup_lock_entry' failed to find or lock a hashmap entry.
+		 * 'memcpy_value' shouldn't be executed.
+		 */
+		memcpy_value_ctx->state = FUTURE_STATE_COMPLETE;
+		return;
+	}
+
+	size_t entry_size = hme->value.size;
+	size_t buf_size = (size_t)arg;
+
+	/* We will not copy more data than buffer can fit */
+	if (entry_size > buf_size) {
+		memcpy_op->data.memcpy.n = buf_size;
+	} else {
+		memcpy_op->data.memcpy.n = entry_size;
+	}
+
+	memcpy_op->data.memcpy.src = hme->value.addr;
+}
+
+/*
+ * Maps 'set_state' future entry data to the 'hashmap_get_copy_fut' chained
+ * future output data.
+ */
+static void
+set_state_to_output_map_for_get(struct future_context *set_state_ctx,
+		struct future_context *get_copy_ctx, void *arg)
+{
+	struct hashmap_entry_set_state_data *set_state_data =
+			future_context_get_data(set_state_ctx);
+	struct hashmap_entry_set_state_output *set_state_output =
+			future_context_get_output(set_state_ctx);
+	struct hashmap_get_copy_data *get_copy_data =
+			future_context_get_data(get_copy_ctx);
+	struct hashmap_get_copy_output *get_copy_output =
+			future_context_get_output(get_copy_ctx);
+
+	struct vdm_operation_data_memcpy memcpy_value_data =
+		get_copy_data->memcpy_value.fut.data.operation.data.memcpy;
+
+	if (set_state_output->changed) {
+		/*
+		 * 'set_state' changed the hashmap entry state from 'locked'.
+		 * We can assume that 'lookup_lock_entry' and 'memcpy_value'
+		 * were successful, so we can set the 'hashmap_get_copy_fut'
+		 * output safely.
+		 */
+		get_copy_output->value = memcpy_value_data.dest;
+		get_copy_output->copy_size = memcpy_value_data.n;
+		get_copy_output->size = set_state_data->hme->value.size;
+	}
+}
+
+/*
+ * Lazy initialization function. Initializes the 'set_state' future based on
+ * the 'lookup_lock_entry' future entry result.
+ */
+static void
+set_state_init_for_get(void *future,
+		struct future_context *hashmap_get_copy_ctx, void *arg)
+{
+	struct hashmap_get_copy_data *data =
+			future_context_get_data(hashmap_get_copy_ctx);
+
+	struct hashmap_entry_set_state_fut fut = {.output.changed = 0};
+	struct hashmap_entry *hme = data->lookup_lock_entry.fut.output.hme;
+	if (hme == NULL) {
+		/*
+		 * 'lookup_lock_entry' future entry failed to find a hashmap
+		 * entry. 'set_state' future entry shouldn't be executed.
+		 */
+		FUTURE_INIT_COMPLETE(&fut);
+	} else {
+		/*
+		 * 'lookup_lock_entry' was successful.
+		 * Set hashmap entry state to 'present'.
+		 */
+		fut = hashmap_entry_set_state(hme,
+				HASHMAP_ENTRY_STATE_LOCKED,
+				HASHMAP_ENTRY_STATE_PRESENT);
+	}
+
+	memcpy(future, &fut, sizeof(fut));
+}
+
+/* Creates and initializes a new hashmap_get_copy_fut future */
+static struct hashmap_get_copy_fut
+hashmap_get_copy(struct vdm *vdm, struct hashmap *hm, uint64_t key, void *buf,
+		size_t buf_size)
+{
+	struct hashmap_get_copy_fut chain;
+	/* Initialize chained future entries */
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.lookup_lock_entry,
+			hashmap_lookup_lock_entry(hm, key,
+					HASHMAP_ENTRY_STATE_PRESENT),
+					lookup_lock_entry_to_memcpy_value_map,
+					(void *)buf_size);
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.memcpy_value,
+			vdm_memcpy(vdm, buf, NULL, 0, 0), NULL, NULL);
+	FUTURE_CHAIN_ENTRY_LAZY_INIT(&chain.data.set_state,
+			set_state_init_for_get, NULL,
+			set_state_to_output_map_for_get, NULL);
+	/* Set default output values */
+	chain.output.size = 0;
+	chain.output.copy_size = 0;
+	chain.output.value = NULL;
+
+	FUTURE_CHAIN_INIT(&chain);
+
+	return chain;
+}
+/*
+ * END of hashmap_get_fut future
+ */
+
+/*
+ * BEGIN of hashmap_entry_init_fut future
+ */
+struct hashmap_entry_init_data {
+	struct hashmap *hm;
+	struct hashmap_entry *hme;
+	uint64_t key;
+	size_t size;
+};
+
+struct hashmap_entry_init_output {
+	struct hashmap_entry *hme;
+};
+
+FUTURE(hashmap_entry_init_fut, struct hashmap_entry_init_data,
+		struct hashmap_entry_init_output);
+
+/*
+ * hashmap_entry_init_fut future task function. Initializes a hashmap entry with
+ * an appropriate data and allocates memory for the hashmap entry value.
+ */
+static enum future_state
+hashmap_entry_init_impl(struct future_context *ctx,
+		struct future_notifier *notifier)
+{
+	struct hashmap_entry_init_data *data = future_context_get_data(ctx);
+	struct hashmap_entry_init_output *output =
+			future_context_get_output(ctx);
+	struct hashmap *hm = data->hm;
+	struct hashmap_entry *hme = data->hme;
+	output->hme = hme;
+
+	/* Allocate the memory for a new key-value pair */
+	void *addr = malloc(data->size);
+	if (addr == NULL) {
+		return FUTURE_STATE_COMPLETE;
+	}
+
+	hme->key = data->key;
+	hme->value.addr = addr;
+	hme->value.size = data->size;
+
+	/* Increase the hashmap length */
+	size_t old_val = fetch_and_add(&hm->length, 1);
+	assert(old_val != hm->length);
+
+	/* Avoid unused variable warning */
+	(void) old_val;
+
+	return FUTURE_STATE_COMPLETE;
+}
+
+/* Creates and initializes a new hashmap_entry_init_fut future */
+static struct hashmap_entry_init_fut
+hashmap_entry_init(struct hashmap *hm, struct hashmap_entry *hme, uint64_t key,
+		size_t size)
+{
+	struct hashmap_entry_init_fut future;
+	/* Set input values */
+	future.data.hm = hm;
+	future.data.hme = hme;
+	future.data.key = key;
+	future.data.size = size;
+	/* Set default output value */
+	future.output.hme = NULL;
+
+	FUTURE_INIT(&future, hashmap_entry_init_impl);
+
+	return future;
+}
+/*
+ * END of hashmap_entry_init_fut future
+ */
+
+/*
+ * BEGIN of hashmap_put_fut future
+ */
+struct hashmap_put_data {
+	FUTURE_CHAIN_ENTRY(struct hashmap_lookup_lock_entry_fut,
+			lookup_lock_entry);
+	FUTURE_CHAIN_ENTRY(struct hashmap_entry_init_fut, init_entry);
+	FUTURE_CHAIN_ENTRY(struct vdm_operation_future, memcpy_value);
+	FUTURE_CHAIN_ENTRY(struct hashmap_entry_set_state_fut, set_state);
+};
+
+struct hashmap_put_output {
+	char *value;
+};
+
+FUTURE(hashmap_put_fut, struct hashmap_put_data, struct hashmap_put_output);
+
+/* Maps the 'lock_entry' future data to the 'entry_init' future data */
+static void
+lookup_lock_entry_to_entry_init_map(
+		struct future_context *lookup_lock_entry_ctx,
+		struct future_context *init_entry_ctx, void *arg)
+{
+	struct hashmap_lookup_lock_entry_output *lookup_lock_output =
+			future_context_get_output(lookup_lock_entry_ctx);
+	struct hashmap_entry_init_data *entry_init_data =
+			future_context_get_data(init_entry_ctx);
+	struct hashmap_entry *hme = lookup_lock_output->hme;
+
+	if (hme == NULL) {
+		/*
+		 * 'lookup_lock' future failed to find a hashmap entry,
+		 * 'entry_init' future shouldn't be executed.
+		 */
+		init_entry_ctx->state = FUTURE_STATE_COMPLETE;
+	}
+
+	entry_init_data->hme = hme;
+}
+
+/*
+ * Maps the 'entry_init' future entry output data to the 'memcpy_value' future
+ * entry data.
+ */
+static void
+entry_init_to_memcpy_value_map(struct future_context *init_entry_ctx,
+		    struct future_context *memcpy_value_ctx, void *arg)
+{
+	struct hashmap_entry_init_output *entry_init_output =
+			future_context_get_output(init_entry_ctx);
+	struct vdm_operation_data *memcpy_value_data =
+			future_context_get_data(memcpy_value_ctx);
+
+	struct hashmap_entry *hme = entry_init_output->hme;
+	if (hme == NULL || hme->value.addr == NULL) {
+		/*
+		 * Either 'lookup_lock' future entry failed to find a hashmap
+		 * entry or 'entry_init' future entry failed to initialize it.
+		 * 'memcpy_value' future entry shouldn't be executed.
+		 */
+		memcpy_value_ctx->state = FUTURE_STATE_COMPLETE;
+		return;
+	}
+
+	struct vdm_operation *memcpy_op = &memcpy_value_data->operation;
+	memcpy_op->data.memcpy.dest = hme->value.addr;
+}
+
+/*
+ * Maps the 'set_state' future entry data to the 'hashmap_put_fut' chained
+ * future output data.
+ */
+static void
+set_entry_state_to_output_map_for_put(
+		struct future_context *entry_set_state_ctx,
+		struct future_context *put_ctx, void *arg)
+{
+	struct hashmap_entry_set_state_data *entry_set_state_data =
+			future_context_get_data(entry_set_state_ctx);
+	struct hashmap_put_output *put_output =
+			future_context_get_output(put_ctx);
+	struct hashmap_entry *hme = entry_set_state_data->hme;
+
+	/*
+	 * Map the address of the hashmap locked by the 'lookup_lock_entry',
+	 * 'init_entry', 'memcpy_value', and 'set_state' future entries to
+	 * the 'hashmap_put' chained future output.
+	 */
+	put_output->value = (hme) ? hme->value.addr : NULL;
+}
+
+/*
+ * Lazy initialization function. Initializes the 'set_state' future based on
+ * the 'init_entry' future entry result.
+ */
+static void
+set_state_init_for_put(void *future,
+		struct future_context *hashmap_put_ctx, void *arg)
+{
+	struct hashmap_put_data *data =
+			future_context_get_data(hashmap_put_ctx);
+	struct hashmap_entry_set_state_fut fut = {.output.changed = 0};
+	struct hashmap_entry *hme = data->init_entry.fut.output.hme;
+
+	/*
+	 * 'lookup_lock_entry' future entry failed to find and lock a hashmap
+	 * entry. 'set_state' future entry shouldn't be executed.
+	 */
+	if (hme == NULL) {
+		FUTURE_INIT_COMPLETE(&fut);
+		fut.data.hme = NULL;
+		goto copy_fut;
+	}
+
+	/*
+	 * Hashmap entry state should be set to 'unoccupied' when 'init_entry'
+	 * future entry failed or 'present' when it was successful.
+	 */
+	enum hashmap_entry_state state = (hme->value.addr != NULL) ?
+		HASHMAP_ENTRY_STATE_PRESENT : HASHMAP_ENTRY_STATE_UNOCCUPIED;
+
+	fut = hashmap_entry_set_state(hme, HASHMAP_ENTRY_STATE_LOCKED,
+			state);
+
+copy_fut:
+	memcpy(future, &fut, sizeof(fut));
+}
+
+/* Creates and initializes a new hashmap_put_fut future */
+static struct hashmap_put_fut
+hashmap_put(struct vdm *vdm, struct hashmap *hm, uint64_t key, void *value,
+		size_t size)
+{
+	struct hashmap_put_fut chain;
+	/* Initialize chained future entries */
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.lookup_lock_entry,
+			hashmap_lookup_lock_entry(hm, key,
+					HASHMAP_ENTRY_STATE_UNOCCUPIED),
+			lookup_lock_entry_to_entry_init_map, NULL);
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.init_entry,
+			hashmap_entry_init(hm, NULL, key, size),
+			entry_init_to_memcpy_value_map, NULL);
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.memcpy_value,
+			vdm_memcpy(vdm, NULL, value, size, 0), NULL, NULL);
+	FUTURE_CHAIN_ENTRY_LAZY_INIT(&chain.data.set_state,
+			set_state_init_for_put, NULL,
+			set_entry_state_to_output_map_for_put, NULL);
+	/* Set default output value */
+	chain.output.value = NULL;
+
+	FUTURE_CHAIN_INIT(&chain);
+
+	return chain;
+}
+/*
+ * END of hashmap_put_fut future
+ */
+
+/*
+ * BEGIN of hashmap_entry_fini_fut future
+ */
+struct hashmap_entry_fini_data {
+	struct hashmap *hm;
+	struct hashmap_entry *hme;
+};
+
+struct hashmap_entry_fini_output {
+	struct hashmap_entry *hme;
+};
+
+FUTURE(hashmap_entry_fini_fut, struct hashmap_entry_fini_data,
+		struct hashmap_entry_fini_output);
+
+/*
+ * hashmap_entry_fini_fut future task function. Finalizes a hashmap entry.
+ */
+static enum future_state
+hashmap_entry_fini_impl(struct future_context *ctx,
+		struct future_notifier *notifier)
+{
+	struct hashmap_entry_fini_data *data = future_context_get_data(ctx);
+	struct hashmap_entry_fini_output *output =
+			future_context_get_output(ctx);
+	struct hashmap *hm = data->hm;
+	struct hashmap_entry *hme = data->hme;
+	output->hme = hme;
+
+	free(hme->value.addr);
+
+	size_t old_val = fetch_and_sub(&hm->length, 1);
+	assert(old_val != hm->length);
+
+	/* Avoid unused variable warning */
+	(void) old_val;
+
+	return FUTURE_STATE_COMPLETE;
+}
+
+/* Creates and initializes a new hashmap_entry_fini_fut future */
+static struct hashmap_entry_fini_fut
+hashmap_entry_fini(struct hashmap *hm, struct hashmap_entry *hme)
+{
+	struct hashmap_entry_fini_fut future;
+	/* Set input values */
+	future.data.hm = hm;
+	future.data.hme = hme;
+	/* Set default output value */
+	future.output.hme = NULL;
+
+	FUTURE_INIT(&future, hashmap_entry_fini_impl);
+
+	return future;
+}
+/*
+ * END of hashmap_entry_fini_fut future
+ */
+
+/*
+ * BEGIN of hashmap_remove_fut future
+ */
+struct hashmap_remove_data {
+	FUTURE_CHAIN_ENTRY(struct hashmap_lookup_lock_entry_fut,
+			lookup_lock_entry);
+	FUTURE_CHAIN_ENTRY(struct hashmap_entry_fini_fut, fini_entry);
+	FUTURE_CHAIN_ENTRY(struct hashmap_entry_set_state_fut, set_state);
+};
+
+struct hashmap_remove_output {
+	uint64_t key;
+};
+
+FUTURE(hashmap_remove_fut, struct hashmap_remove_data,
+		struct hashmap_remove_output);
+
+/*
+ * Maps the 'lookup_lock_entry' future entry data to the 'fini_entry' future
+ * entry data.
+ */
+static void
+lookup_lock_entry_to_fini_entry_map(
+		struct future_context *lookup_lock_entry_ctx,
+		struct future_context *fini_entry_ctx, void *arg)
+{
+	struct hashmap_lookup_lock_entry_output *lookup_lock_output =
+			future_context_get_output(lookup_lock_entry_ctx);
+	struct hashmap_entry_fini_data *fini_data =
+			future_context_get_data(fini_entry_ctx);
+	struct hashmap_entry *hme = lookup_lock_output->hme;
+
+	if (hme == NULL) {
+		/*
+		 * 'lookup_lock_entry' future entry failed to find a hashmap
+		 * entry. 'fini_entry' future entry shouldn't be executed.
+		 */
+		fini_entry_ctx->state = FUTURE_STATE_COMPLETE;
+	}
+
+	fini_data->hme = hme;
+}
+
+/*
+ * Maps the 'set_state' future entry data to the 'hashmap_remove_fut' chained
+ * future output data.
+ */
+static void
+set_entry_state_to_output_map_for_remove(
+		struct future_context *entry_set_state_ctx,
+		struct future_context *remove_ctx, void *arg)
+{
+	struct hashmap_entry_set_state_data *entry_set_state_data =
+			future_context_get_data(entry_set_state_ctx);
+	struct hashmap_remove_output *remove_output =
+			future_context_get_output(remove_ctx);
+	struct hashmap_entry *hme = entry_set_state_data->hme;
+	remove_output->key = (hme) ? hme->key : 0;
+}
+
+/*
+ * Lazy initialization function. Initializes the 'set_state' future based on
+ * the 'fini_entry' future entry result.
+ */
+static void
+set_state_init_for_remove(void *future,
+		struct future_context *hashmap_remove_ctx, void *arg)
+{
+	struct hashmap_remove_data *data =
+			future_context_get_data(hashmap_remove_ctx);
+	struct hashmap_entry_set_state_fut fut = {.output.changed = 0};
+	struct hashmap_entry *hme = data->fini_entry.fut.output.hme;
+
+	if (hme == NULL) {
+		/*
+		 * 'lookup_lock_entry' future entry failed to find a hashmap
+		 * entry. 'set_state' future entry shouldn't be executed.
+		 */
+		FUTURE_INIT_COMPLETE(&fut);
+		fut.data.hme = NULL;
+	} else {
+		/*
+		 * 'fini_entry' future entry was successful. Initialize
+		 * the 'set_state' future entry to change the hashmap entry
+		 * state to 'unoccupied'.
+		 */
+		fut = hashmap_entry_set_state(hme,
+				HASHMAP_ENTRY_STATE_LOCKED,
+				HASHMAP_ENTRY_STATE_UNOCCUPIED);
+	}
+
+	memcpy(future, &fut, sizeof(fut));
+}
+
+/* Creates and initializes a new hashmap_remove_fut future */
+static struct hashmap_remove_fut
+hashmap_remove(struct hashmap *hm, uint64_t key)
+{
+	struct hashmap_remove_fut chain;
+	/* Initialize chained future entries */
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.lookup_lock_entry,
+			hashmap_lookup_lock_entry(hm, key,
+					HASHMAP_ENTRY_STATE_PRESENT),
+			lookup_lock_entry_to_fini_entry_map, NULL);
+	FUTURE_CHAIN_ENTRY_INIT(&chain.data.fini_entry,
+			hashmap_entry_fini(hm, NULL), NULL, NULL);
+	FUTURE_CHAIN_ENTRY_LAZY_INIT(&chain.data.set_state,
+			set_state_init_for_remove, NULL,
+			set_entry_state_to_output_map_for_remove, NULL);
+	/* Set default output value */
+	chain.output.key = 0;
+
+	FUTURE_CHAIN_INIT(&chain);
+
+	return chain;
+}
+/*
+ * END of hashmap_remove_fut future
+ */
+
+typedef void (*hashmap_cb)(uint64_t key, void *value, void *arg);
+
+/* Executes callback function for each entry stored in hashmap */
+static void
+hashmap_foreach(struct hashmap *hm, hashmap_cb cb, void *arg)
+{
+	uint64_t key;
+	void *value;
+	for (size_t i = 0; i < hm->capacity; i++) {
+		if (hashmap_entry_unoccupied(&hm->entries[i])) {
+			continue;
+		}
+
+		key = hm->entries[i].key;
+		value = hm->entries[i].value.addr;
+
+		cb(key, value, arg);
+	}
+}
+
+/* Hashmap callback, prints key-value pair */
+static void
+print_entry(uint64_t key, void *value, void *arg)
+{
+	printf("key: %" PRIu64 ", value: %s\n", key, (char *)value);
+}
+
+int
+main(void)
+{
+	/* Set up the data, create a hashmap instance */
+	char val_1[] = "Foo";
+	char val_2[] = "Bar";
+	char val_3[] = "Fizz";
+	char val_4[] = "Buzz";
+	char other_val[] = "Coffee";
+
+	struct hashmap *hm = hashmap_new(4);
+
+	/* Create a runtime instance for efficient future polling */
+	struct runtime *r = runtime_new();
+
+	/* Create a thread mover to be used for data move operations */
+	struct data_mover_threads *dmt = data_mover_threads_default();
+	if (dmt == NULL) {
+		printf("failed to allocate data mover.\n");
+		return 1;
+	}
+
+	struct vdm *tmover = data_mover_threads_get_vdm(dmt);
+
+	/*
+	 * Populate the hashmap. Create four 'hashmap_put_fut' futures and wait
+	 * for their completion. 'hashmap_put' future implementation uses data
+	 * mover for data copying.
+	 */
+	struct hashmap_put_fut put_futs[4];
+	put_futs[0] = hashmap_put(tmover, hm, 1, val_1, strlen(val_1) + 1);
+	put_futs[1] = hashmap_put(tmover, hm, 2, val_2, strlen(val_2) + 1);
+	put_futs[2] = hashmap_put(tmover, hm, 3, val_3, strlen(val_3) + 1);
+	put_futs[3] = hashmap_put(tmover, hm, 4, val_4, strlen(val_4) + 1);
+
+	WAIT_FUTURES(r, put_futs, 4);
+
+	/*
+	 * Successful put operation outputs the stored key. Use 'FUTURE_OUTPUT`
+	 * macro to extract each future output and assert that none failed.
+	 */
+	struct hashmap_put_output *put_output;
+	for (int i = 0; i < 4; i++) {
+		put_output = FUTURE_OUTPUT(&put_futs[i]);
+		assert(put_output->value != NULL);
+	}
+	/*
+	 * At this moment hashmap 'hm' stores four entries with the following
+	 * key-value pairs: (1, "Foo"), (2, "Bar"), (3, "Fizz"), (4, "Buzz").
+	 */
+
+	/* Insert another entry into the hashmap, exceeding hashmap capacity */
+	put_futs[0] = hashmap_put(tmover, hm, 404, other_val,
+			strlen(other_val) + 1);
+
+	WAIT_FUTURES(r, put_futs, 1);
+
+	/* Failed insert outputs '0' */
+	put_output = FUTURE_OUTPUT(&put_futs[0]);
+	assert(put_output->value == NULL);
+
+	/*
+	 * Make space in the hashmap. Create two 'hashmap_remove_fut` futures
+	 * and wait for their completion.
+	 */
+	struct hashmap_remove_fut remove_futs[2];
+	remove_futs[0] = hashmap_remove(hm, 2);
+	remove_futs[1] = hashmap_remove(hm, 3);
+
+	WAIT_FUTURES(r, remove_futs, 2);
+
+	/* Successful remove operation outputs key of the removed entry */
+	struct hashmap_remove_output *remove_output;
+	for (int i = 0; i < 2; i++) {
+		remove_output = FUTURE_OUTPUT(&remove_futs[i]);
+		assert(remove_output->key != 0);
+	}
+	/*
+	 * Currently, hashmap 'hm' stores two entries with the following
+	 * key-value pairs: (1, "Foo"), (4, "Buzz").
+	 */
+
+	/* Insert two entries with keys already present in the hashmap */
+	put_futs[0] = hashmap_put(tmover, hm, 1, other_val,
+			strlen(other_val) + 1);
+	put_futs[1] = hashmap_put(tmover, hm, 4, other_val,
+			strlen(other_val) + 1);
+
+	WAIT_FUTURES(r, put_futs, 2);
+
+	/* Hashmap cannot store entry with duplicated key */
+	for (int i = 0; i < 2; i++) {
+		put_output = FUTURE_OUTPUT(&put_futs[i]);
+		assert(put_output->value == NULL);
+	}
+
+	/*
+	 * Get value of the entry with '4' key. Create a 'hashmap_get_fut'
+	 * future and wait for its execution.
+	 */
+	size_t buf_size = 32;
+	char *buf = malloc(buf_size);
+
+	struct hashmap_get_copy_fut get_futs[1];
+	get_futs[0] = hashmap_get_copy(tmover, hm, 4, buf, buf_size);
+
+	WAIT_FUTURES(r, get_futs, 1);
+
+	/* Entry with '4' key should store value 'Buzz' */
+	struct hashmap_get_copy_output *get_copy_output =
+			FUTURE_OUTPUT(&get_futs[0]);
+	assert(get_copy_output->value == buf);
+	assert(get_copy_output->size == strlen(val_4) + 1);
+	/* 'hashmap_get_copy_fut' will not copy more data than buffer can fit */
+	assert(get_copy_output->copy_size == strlen(val_4) + 1);
+	printf("copied value: %s\n", buf);
+	free(buf);
+
+	/* Print key-value pairs of every entry stored in the hashmap */
+	hashmap_foreach(hm, print_entry, NULL);
+
+	runtime_delete(r);
+	hashmap_delete(hm);
+	/* Avoid unused variable warning */
+	(void) put_output;
+	(void) remove_output;
+	(void) get_copy_output;
+
+	return 0;
+}
diff --git a/extras/dml/data_mover_dml.c b/extras/dml/data_mover_dml.c
index e76f459f53f..b75e19cdc45 100644
--- a/extras/dml/data_mover_dml.c
+++ b/extras/dml/data_mover_dml.c
@@ -11,6 +11,12 @@
 #include "libminiasync-vdm-dml.h"
 
 #define SUPPORTED_FLAGS VDM_F_MEM_DURABLE | VDM_F_NO_CACHE_HINT
+/*
+ * XXX: This flag should be defined in DML header but for some reason isn't
+ * this flag is needed to guarantee that writes to persistent memory
+ * are persistent at the time DSA operation is completed
+ */
+#define DSA_F_DESTINATION_READBACK (1 < 14)
 
 struct data_mover_dml {
 	struct vdm base; /* must be first */
@@ -38,9 +44,11 @@ data_mover_dml_translate_flags(uint64_t flags, uint64_t *dml_flags)
 			 */
 			case VDM_F_MEM_DURABLE:
 				*dml_flags |= DML_FLAG_DST1_DURABLE;
+				*dml_flags |= DSA_F_DESTINATION_READBACK;
 				break;
 			case VDM_F_NO_CACHE_HINT:
 				*dml_flags &= ~DML_FLAG_PREFETCH_CACHE;
+				break;
 			default: /* shouldn't be possible */
 				ASSERT(0);
 		}
@@ -119,6 +127,24 @@ data_mover_dml_memset_job_init(dml_job_t *dml_job,
 	return dml_job;
 }
 
+/*
+ * data_mover_dml_flush_job_init -- initializes new flush dml job
+ */
+static dml_job_t *
+data_mover_dml_flush_job_init(dml_job_t *dml_job,
+	void *dest, size_t n, uint64_t flags)
+{
+	uint64_t dml_flags = 0;
+	data_mover_dml_translate_flags(flags, &dml_flags);
+
+	dml_job->operation = DML_OP_CACHE_FLUSH;
+	dml_job->destination_first_ptr = (uint8_t *)dest;
+	dml_job->destination_length = n;
+	dml_job->flags = dml_flags;
+
+	return dml_job;
+}
+
 /*
  * data_mover_dml_job_delete -- delete job struct
  */
@@ -157,6 +183,7 @@ data_mover_dml_operation_new(struct vdm *vdm,
 		case VDM_OPERATION_MEMCPY:
 		case VDM_OPERATION_MEMMOVE:
 		case VDM_OPERATION_MEMSET:
+		case VDM_OPERATION_FLUSH:
 			break;
 		default:
 			ASSERT(0); /* unreachable */
@@ -219,6 +246,9 @@ data_mover_dml_operation_delete(void *data,
 			output->type = VDM_OPERATION_MEMSET;
 			output->output.memset.str = job->destination_first_ptr;
 			break;
+		case DML_OP_CACHE_FLUSH:
+			output->type = VDM_OPERATION_FLUSH;
+			break;
 		default:
 			ASSERT(0);
 	}
@@ -289,6 +319,13 @@ data_mover_dml_operation_start(void *data,
 					operation->data.memset.flags);
 				data_mover_dml_memory_op_job_submit(job);
 			break;
+		case VDM_OPERATION_FLUSH:
+				data_mover_dml_flush_job_init(job,
+					operation->data.flush.dest,
+					operation->data.flush.n,
+					operation->data.flush.flags);
+				data_mover_dml_memory_op_job_submit(job);
+			break;
 		default:
 			ASSERT(0);
 	}
@@ -296,6 +333,14 @@ data_mover_dml_operation_start(void *data,
 	return 0;
 }
 
+int
+has_property_dmd(void *fut, enum future_property property)
+{
+	if (property == FUTURE_PROPERTY_ASYNC)
+		return 1;
+	return 0;
+}
+
 /*
  * data_mover_dml_vdm -- dml asynchronous memcpy
  */
@@ -305,6 +350,7 @@ static struct vdm data_mover_dml_vdm = {
 	.op_check = data_mover_dml_operation_check,
 	.op_start = data_mover_dml_operation_start,
 	.capabilities = SUPPORTED_FLAGS,
+	.has_property = has_property_dmd,
 };
 
 /*
diff --git a/src/core/membuf.c b/src/core/membuf.c
index 8b427eab054..629fdabaa5e 100644
--- a/src/core/membuf.c
+++ b/src/core/membuf.c
@@ -127,6 +127,45 @@ membuf_entry_is_allocated(void *real_ptr)
 	return allocated;
 }
 
+/*
+ * membuf_threadbuf_prune -- reclaims available buffer space
+ */
+static void
+membuf_threadbuf_prune(struct membuf *membuf,
+	struct threadbuf *tbuf)
+{
+	while (tbuf->available != tbuf->size) {
+		/* reuse leftovers after a wraparound */
+		if (tbuf->leftovers != 0 &&
+			(tbuf->size - (tbuf->offset + tbuf->available))
+				== tbuf->leftovers) {
+			tbuf->available += tbuf->leftovers;
+			tbuf->leftovers = 0;
+
+			continue;
+		}
+
+		/* check the next object after the available memory */
+		size_t next_loc = (tbuf->offset + tbuf->available) % tbuf->size;
+		void *next = &tbuf->buf[next_loc];
+		if (membuf_entry_is_allocated(next))
+			return;
+
+		tbuf->available += membuf_entry_get_size(next);
+	}
+}
+
+/*
+ * tbuf_check_safe_for_reuse -- verifies if the thread buffer doesn't contain
+ * any live allocations that would prevent it from being reused.
+ */
+static int
+tbuf_check_safe_for_reuse(struct threadbuf *tbuf)
+{
+	membuf_threadbuf_prune(tbuf->membuf, tbuf);
+	return tbuf->available == tbuf->size;
+}
+
 /*
  * membuf_get_threadbuf -- returns thread-local buffer for allocations
  */
@@ -139,8 +178,8 @@ membuf_get_threadbuf(struct membuf *membuf)
 
 	os_mutex_lock(&membuf->lists_lock);
 
-	if (membuf->tbuf_unused_first != NULL) {
-		tbuf = membuf->tbuf_unused_first;
+	tbuf = membuf->tbuf_unused_first;
+	if (tbuf != NULL && tbuf_check_safe_for_reuse(tbuf)) {
 		membuf->tbuf_unused_first = tbuf->unused_next;
 	} else {
 		/*
@@ -171,34 +210,6 @@ membuf_get_threadbuf(struct membuf *membuf)
 	return tbuf;
 }
 
-/*
- * membuf_threadbuf_prune -- reclaims available buffer space
- */
-static void
-membuf_threadbuf_prune(struct membuf *membuf,
-	struct threadbuf *tbuf)
-{
-	while (tbuf->available != tbuf->size) {
-		/* reuse leftovers after a wraparound */
-		if (tbuf->leftovers != 0 &&
-			(tbuf->size - (tbuf->offset + tbuf->available))
-				== tbuf->leftovers) {
-			tbuf->available += tbuf->leftovers;
-			tbuf->leftovers = 0;
-
-			continue;
-		}
-
-		/* check the next object after the available memory */
-		size_t next_loc = (tbuf->offset + tbuf->available) % tbuf->size;
-		void *next = &tbuf->buf[next_loc];
-		if (membuf_entry_is_allocated(next))
-			return;
-
-		tbuf->available += membuf_entry_get_size(next);
-	}
-}
-
 /*
  * membuf_alloc -- allocate linearly from the available memory location.
  */
diff --git a/src/core/ringbuf.c b/src/core/ringbuf.c
index d834d865b95..e0daf3861b2 100644
--- a/src/core/ringbuf.c
+++ b/src/core/ringbuf.c
@@ -225,8 +225,9 @@ ringbuf_dequeue_atomic(struct ringbuf *rbuf)
 
 	VALGRIND_ANNOTATE_HAPPENS_AFTER(&rbuf->data[r]);
 	do {
-		while ((data = rbuf->data[r]) == NULL)
-			__sync_synchronize();
+		do {
+			util_atomic_load64(&rbuf->data[r], &data);
+		} while (data == NULL);
 	} while (!util_bool_compare_and_swap64(&rbuf->data[r], data, NULL));
 
 	return data;
diff --git a/src/core/util.h b/src/core/util.h
index acd45aac773..f866d362644 100644
--- a/src/core/util.h
+++ b/src/core/util.h
@@ -62,15 +62,6 @@ extern "C" {
 extern unsigned long long Pagesize;
 extern unsigned long long Mmap_align;
 
-#if defined(__x86_64) || defined(_M_X64) || defined(__aarch64__) || \
-	defined(__riscv)
-#define CACHELINE_SIZE 64ULL
-#elif defined(__PPC64__)
-#define CACHELINE_SIZE 128ULL
-#else
-#error unable to recognize architecture at compile time
-#endif
-
 #define PAGE_ALIGNED_DOWN_SIZE(size) ((size) & ~(Pagesize - 1))
 #define PAGE_ALIGNED_UP_SIZE(size)\
 	PAGE_ALIGNED_DOWN_SIZE((size) + (Pagesize - 1))
diff --git a/src/data_mover_sync.c b/src/data_mover_sync.c
index 0f4ceef1f2a..d43a2fa3954 100644
--- a/src/data_mover_sync.c
+++ b/src/data_mover_sync.c
@@ -121,6 +121,10 @@ sync_operation_start(void *data, const struct vdm_operation *operation,
 				operation->data.memset.c,
 				operation->data.memset.n);
 			break;
+		case VDM_OPERATION_FLUSH:
+			printf("flush operation not implemented "
+						"for sync data mover");
+			exit(1);
 		default:
 			ASSERT(0);
 	}
@@ -137,6 +141,7 @@ static struct vdm data_mover_sync_vdm = {
 	.op_check = sync_operation_check,
 	.op_start = sync_operation_start,
 	.capabilities = SUPPORTED_FLAGS,
+	.has_property = NULL,
 };
 
 /*
diff --git a/src/data_mover_threads.c b/src/data_mover_threads.c
index 5350ce651b9..813bb222ca9 100644
--- a/src/data_mover_threads.c
+++ b/src/data_mover_threads.c
@@ -118,6 +118,10 @@ data_mover_threads_do_operation(struct data_mover_threads_data *data,
 			op_memset(mdata->str,
 				mdata->c, mdata->n, (unsigned)mdata->flags);
 		} break;
+		case VDM_OPERATION_FLUSH:
+			printf("flush operation not implemented "
+					"for threads data mover");
+			exit(1);
 		default:
 			ASSERT(0); /* unreachable */
 			break;
@@ -268,12 +272,21 @@ data_mover_threads_operation_start(void *data,
 	return 0;
 }
 
+int
+has_property_dmt(void *fut, enum future_property property)
+{
+	if (property == FUTURE_PROPERTY_ASYNC)
+		return 1;
+	return 0;
+}
+
 static struct vdm data_mover_threads_vdm = {
 	.op_new = data_mover_threads_operation_new,
 	.op_delete = data_mover_threads_operation_delete,
 	.op_check = data_mover_threads_operation_check,
 	.op_start = data_mover_threads_operation_start,
 	.capabilities = SUPPORTED_FLAGS,
+	.has_property = has_property_dmt,
 };
 
 /*
diff --git a/src/include/libminiasync/future.h b/src/include/libminiasync/future.h
index d20f582041e..de396adeb9d 100644
--- a/src/include/libminiasync/future.h
+++ b/src/include/libminiasync/future.h
@@ -115,6 +115,10 @@ struct future_notifier {
 	uint32_t padding;
 };
 
+enum future_property {
+	FUTURE_PROPERTY_ASYNC,
+};
+
 static inline void *
 future_context_get_data(struct future_context *context)
 {
@@ -138,9 +142,12 @@ future_context_get_size(struct future_context *context)
 
 typedef enum future_state (*future_task_fn)(struct future_context *context,
 			struct future_notifier *notifier);
+typedef int (*future_has_property_fn)(void *future,
+			enum future_property property);
 
 struct future {
 	future_task_fn task;
+	future_has_property_fn has_property;
 	struct future_context context;
 };
 
@@ -151,9 +158,10 @@ struct future {
 		_output_type output;\
 	}
 
-#define FUTURE_INIT(_futurep, _taskfn)\
+#define FUTURE_INIT_EXT(_futurep, _taskfn, _propertyfn)\
 do {\
 	(_futurep)->base.task = (_taskfn);\
+	(_futurep)->base.has_property = (_propertyfn);\
 	(_futurep)->base.context.state = (FUTURE_STATE_IDLE);\
 	(_futurep)->base.context.data_size = sizeof((_futurep)->data);\
 	(_futurep)->base.context.output_size =\
@@ -161,9 +169,13 @@ do {\
 	(_futurep)->base.context.padding = 0;\
 } while (0)
 
+#define FUTURE_INIT(_futurep, _taskfn)\
+FUTURE_INIT_EXT((_futurep), (_taskfn), future_has_property_default)
+
 #define FUTURE_INIT_COMPLETE(_futurep)\
 do {\
 	(_futurep)->base.task = NULL;\
+	(_futurep)->base.has_property = NULL;\
 	(_futurep)->base.context.state = (FUTURE_STATE_COMPLETE);\
 	(_futurep)->base.context.data_size = sizeof((_futurep)->data);\
 	(_futurep)->base.context.output_size =\
@@ -253,6 +265,9 @@ FUTURE_CHAIN_ENTRY_HAS_FLAG(_entry, FUTURE_CHAIN_FLAG_ENTRY_LAST)
 #define FUTURE_CHAIN_ENTRY_IS_PROCESSED(_entry)\
 FUTURE_CHAIN_ENTRY_HAS_FLAG(_entry, FUTURE_CHAIN_FLAG_ENTRY_PROCESSED)
 
+#define FUTURE_CHAIN_ENTRY_IS_INITIALIZED(_entry)\
+((_entry)->init == NULL)
+
 /*
  * TODO: Notifiers have to be copied into the state of the future, so we might
  * consider just passing it by copy here... Needs to be evaluated for
@@ -268,17 +283,57 @@ future_poll(struct future *fut, struct future_notifier *notifier)
 	return fut->context.state;
 }
 
+/*
+ * future_has_property -- returns 1 if a property is set and 0 otherwise.
+ * It's an abstract implementation, which works for both regular and
+ * chained futures.
+ */
+static inline int
+future_has_property(struct future *fut, enum future_property property)
+{
+	return fut->has_property(fut, property);
+}
+
 #define FUTURE_BUSY_POLL(_futurep)\
 while (future_poll(FUTURE_AS_RUNNABLE((_futurep)), NULL) !=\
 	FUTURE_STATE_COMPLETE) { __FUTURE_WAIT(); }
 
-static inline enum future_state
-async_chain_impl(struct future_context *ctx, struct future_notifier *notifier)
+static inline struct future_chain_entry *
+get_next_future_chain_entry(struct future_context *ctx,
+		struct future_chain_entry *entry, uint8_t *data,
+		size_t *used_data)
 {
 #define _MINIASYNC_PTRSIZE sizeof(void *)
 #define _MINIASYNC_ALIGN_UP(size)\
 	(((size) + _MINIASYNC_PTRSIZE - 1) & ~(_MINIASYNC_PTRSIZE - 1))
 
+	if (entry->init) {
+		entry->init(&entry->future, ctx, entry->init_arg);
+		entry->init = NULL;
+	}
+	/*
+	 * `struct future` starts with a pointer, so the structure will
+	 * be pointer-size aligned. We need to account for that when
+	 * calculating where is the next future in a chained struct.
+	 */
+	*used_data += _MINIASYNC_ALIGN_UP(
+			sizeof(struct future_chain_entry) +
+			future_context_get_size(&entry->future.context));
+	struct future_chain_entry *next = NULL;
+	if (!FUTURE_CHAIN_ENTRY_IS_LAST(entry) &&
+		*used_data != ctx->data_size) {
+		next = (struct future_chain_entry *)(data + *used_data);
+	}
+
+#undef _MINIASYNC_PTRSIZE
+#undef _MINIASYNC_ALIGN_UP
+
+	return next;
+}
+
+static inline enum future_state
+async_chain_impl(struct future_context *ctx, struct future_notifier *notifier)
+{
 	uint8_t *data = (uint8_t *)future_context_get_data(ctx);
 
 	struct future_chain_entry *entry = (struct future_chain_entry *)(data);
@@ -290,23 +345,9 @@ async_chain_impl(struct future_context *ctx, struct future_notifier *notifier)
 	 * Futures must be laid out sequentially in memory for this to work.
 	 */
 	while (entry != NULL) {
-		if (entry->init) {
-			entry->init(&entry->future, ctx, entry->init_arg);
-			entry->init = NULL;
-		}
-		/*
-		 * `struct future` starts with a pointer, so the structure will
-		 * be pointer-size aligned. We need to account for that when
-		 * calculating where is the next future in a chained struct.
-		 */
-		used_data += _MINIASYNC_ALIGN_UP(
-			sizeof(struct future_chain_entry) +
-			future_context_get_size(&entry->future.context));
-		struct future_chain_entry *next = NULL;
-		if (!FUTURE_CHAIN_ENTRY_IS_LAST(entry) &&
-		    used_data != ctx->data_size) {
-			next = (struct future_chain_entry *)(data + used_data);
-		}
+		struct future_chain_entry *next =
+			get_next_future_chain_entry(ctx, entry,
+						data, &used_data);
 		if (!FUTURE_CHAIN_ENTRY_IS_PROCESSED(entry)) {
 			if (future_poll(&entry->future, notifier) ==
 			    FUTURE_STATE_COMPLETE) {
@@ -330,14 +371,48 @@ async_chain_impl(struct future_context *ctx, struct future_notifier *notifier)
 		}
 		entry = next;
 	}
-#undef _MINIASYNC_PTRSIZE
-#undef _MINIASYNC_ALIGN_UP
 
 	return FUTURE_STATE_COMPLETE;
 }
 
+static inline int
+future_has_property_default(void *future, enum future_property property)
+{
+	/* suppres unused parameters */
+	(void) (future);
+	(void) (property);
+	/* by default every property is set to false */
+	return 0;
+}
+
+static inline int
+future_chain_has_property(void *future, enum future_property property)
+{
+	struct future *fut = future;
+	struct future_context *ctx = &fut->context;
+	uint8_t *data = (uint8_t *)future_context_get_data(ctx);
+	struct future_chain_entry *entry = (struct future_chain_entry *)(data);
+
+	size_t used_data = 0;
+
+	while (entry != NULL) {
+		struct future_chain_entry *next =
+			get_next_future_chain_entry(ctx, entry,
+						data, &used_data);
+		if (!FUTURE_CHAIN_ENTRY_IS_PROCESSED(entry)) {
+			if ((entry->future.has_property(&entry->future,
+						property)))
+				return 1;
+			return 0;
+		}
+		entry = next;
+	}
+
+	return -1;
+}
+
 #define FUTURE_CHAIN_INIT(_futurep)\
-FUTURE_INIT((_futurep), async_chain_impl)
+FUTURE_INIT_EXT((_futurep), async_chain_impl, future_chain_has_property)
 
 #ifdef __cplusplus
 }
diff --git a/src/include/libminiasync/vdm.h b/src/include/libminiasync/vdm.h
index 609b0dfcf7d..b86092f35c0 100644
--- a/src/include/libminiasync/vdm.h
+++ b/src/include/libminiasync/vdm.h
@@ -34,6 +34,7 @@ enum vdm_operation_type {
 	VDM_OPERATION_MEMCPY,
 	VDM_OPERATION_MEMMOVE,
 	VDM_OPERATION_MEMSET,
+	VDM_OPERATION_FLUSH,
 };
 
 enum vdm_operation_result {
@@ -63,6 +64,12 @@ struct vdm_operation_data_memset {
 	uint64_t flags;
 };
 
+struct vdm_operation_data_flush {
+	void *dest;
+	size_t n;
+	uint64_t flags;
+};
+
 /* sized so that sizeof(vdm_operation_data) is 64 */
 #define VDM_OPERATION_DATA_MAX_SIZE (40)
 
@@ -71,6 +78,7 @@ struct vdm_operation {
 		struct vdm_operation_data_memcpy memcpy;
 		struct vdm_operation_data_memmove memmove;
 		struct vdm_operation_data_memset memset;
+		struct vdm_operation_data_flush flush;
 		uint8_t data[VDM_OPERATION_DATA_MAX_SIZE];
 	} data;
 	enum vdm_operation_type type;
@@ -95,6 +103,10 @@ struct vdm_operation_output_memset {
 	void *str;
 };
 
+struct vdm_operation_output_flush {
+	uint64_t unused;
+};
+
 struct vdm_operation_output {
 	enum vdm_operation_type type;
 	enum vdm_operation_result result;
@@ -102,6 +114,7 @@ struct vdm_operation_output {
 		struct vdm_operation_output_memcpy memcpy;
 		struct vdm_operation_output_memmove memmove;
 		struct vdm_operation_output_memset memset;
+		struct vdm_operation_output_flush flush;
 	} output;
 };
 
@@ -125,6 +138,7 @@ struct vdm {
 	vdm_operation_start op_start;
 	vdm_operation_check op_check;
 	unsigned capabilities;
+	future_has_property_fn has_property;
 };
 
 struct vdm *vdm_synchronous_new(void);
@@ -176,6 +190,18 @@ vdm_is_supported(struct vdm *vdm, unsigned capability)
 	return (vdm->capabilities && capability) == capability;
 }
 
+/*
+ * vdm_set_has_property_fn -- set a custom has_property function for
+ * a concrete future
+ */
+static inline void
+vdm_set_has_property_fn(struct vdm_operation_future *future,
+		int(*has_property)(void *future, enum future_property property))
+{
+	if (has_property != NULL)
+		future->base.has_property = has_property;
+}
+
 /*
  * vdm_generic_operation -- creates a new vdm future for a given generic
  * operation
@@ -192,6 +218,12 @@ vdm_generic_operation(struct vdm *vdm, struct vdm_operation_future *future)
 	} else {
 		FUTURE_INIT(future, vdm_operation_impl);
 	}
+
+	/*
+	 * set has_property function for the concrete future based
+	 * on the implementation provided by the concrete data mover
+	 */
+	vdm_set_has_property_fn(future, vdm->has_property);
 }
 
 /*
@@ -260,6 +292,26 @@ vdm_memset(struct vdm *vdm, void *str, int c, size_t n, uint64_t flags)
 	return future;
 }
 
+/*
+ * vdm_flush -- instantiates a new flush vdm operation and returns a new
+ * future to represent that operation
+ */
+static inline struct vdm_operation_future
+vdm_flush(struct vdm *vdm, void *dest, size_t n, uint64_t flags)
+{
+	struct vdm_operation_future future;
+	future.data.operation.type = VDM_OPERATION_FLUSH;
+	future.data.operation.data.flush.dest = dest;
+	future.data.operation.data.flush.flags = flags;
+	future.data.operation.data.flush.n = n;
+	future.data.operation.padding = 0;
+	future.output.type = VDM_OPERATION_FLUSH;
+	future.output.result = VDM_SUCCESS;
+
+	vdm_generic_operation(vdm, &future);
+	return future;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/runtime.c b/src/runtime.c
index beb648aedcd..ffac259f16e 100644
--- a/src/runtime.c
+++ b/src/runtime.c
@@ -67,6 +67,22 @@ runtime_sleep(struct runtime *runtime)
 	os_mutex_unlock(&runtime->lock);
 }
 
+int
+future_compare_async(const void *first_fut, const void *second_fut)
+{
+	enum future_property property = FUTURE_PROPERTY_ASYNC;
+	struct future *fut1 = *(struct future **)first_fut;
+	struct future *fut2 = *(struct future **)second_fut;
+
+	if (future_has_property(fut1, property) ==
+			future_has_property(fut2, property))
+				return 0;
+	if (future_has_property(fut1, property) >
+			future_has_property(fut2, property))
+				return -1;
+	return 1;
+}
+
 void
 runtime_wait_multiple(struct runtime *runtime, struct future *futs[],
 						size_t nfuts)
@@ -79,8 +95,11 @@ runtime_wait_multiple(struct runtime *runtime, struct future *futs[],
 	notifier.waker = (struct future_waker){&waker_data, runtime_waker_wake};
 	notifier.poller.ptr_to_monitor = NULL;
 	size_t ndone = 0;
+
 	for (;;) {
 		for (uint64_t i = 0; i < runtime->spins_before_sleep; ++i) {
+			qsort(futs, nfuts, sizeof(struct future *),
+					future_compare_async);
 			for (uint64_t f = 0; f < nfuts; ++f) {
 				struct future *fut = futs[f];
 				if (fut->context.state == FUTURE_STATE_COMPLETE)
@@ -104,6 +123,7 @@ runtime_wait_multiple(struct runtime *runtime, struct future *futs[],
 					break;
 				};
 			}
+
 			if (ndone == nfuts)
 				return;
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 2263cfcf155..40de4fa0998 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -42,6 +42,10 @@ set(SOURCES_DATA_MOVER_DML_TEST_MEMSET
 	data_mover_dml_memset/data_mover_dml_memset.c
 	${SOURCES_UTIL_DML})
 
+set(SOURCES_DATA_MOVER_DML_TEST_FLUSH
+	data_mover_dml_flush/data_mover_dml_flush.c
+	${MINIASYNC_DML_SOURCE_DIR}/utils/util_dml.c)
+
 set(SOURCES_MEMBUF_TEST
 	membuf/membuf_simple.c)
 
@@ -64,6 +68,12 @@ set(SOURCES_MEMMOVE_THREADS_TEST
 set(SOURCES_MEMSET_THREADS_TEST
 	memset_threads/memset_threads.c)
 
+set(SOURCES_RUNTIME_TEST
+	runtime_test/runtime_test.c)
+
+set(SOURCES_FUTURE_PROPERTIES_TEST
+	future_properties/future_property_async.c)
+
 add_custom_target(tests)
 
 add_flag(-Wall)
@@ -124,6 +134,10 @@ add_link_executable(memset_threads
 		"${SOURCES_MEMSET_THREADS_TEST}"
 		"${LIBS_BASIC}")
 
+add_link_executable(future_property_async
+		"${SOURCES_FUTURE_PROPERTIES_TEST}"
+		"${LIBS_BASIC}")
+
 # add test using test function defined in the ctest_helpers.cmake file
 test("dummy" "dummy" test_dummy none)
 test("dummy_drd" "dummy" test_dummy drd)
@@ -139,11 +153,13 @@ test("vdm" "vdm" test_vdm none)
 test("memset_sync" "memset_sync" test_memset_sync none)
 test("memmove_threads" "memmove_threads" test_memmove_threads none)
 test("memset_threads" "memset_threads" test_memset_threads none)
+test("future_properties" "future_properties" test_future_properties none)
 
 # add tests running examples only if they are built
 if(BUILD_EXAMPLES)
 	test("ex_basic" "ex_basic" test_ex_basic none)
 	test("ex_basic_async" "ex_basic_async" test_ex_basic_async none)
+	test("ex_hashmap" "ex_hashmap" test_ex_hashmap none)
 endif()
 
 # add miniasync-vdm-dml test only if the sources in extras/dml were compiled
@@ -164,8 +180,18 @@ if (COMPILE_DML)
 			"${SOURCES_VDM_OPERATION_FUTURE_POLL}"
 			"${LIBS_DML}")
 
+	add_link_executable(data_mover_dml_flush
+			"${SOURCES_DATA_MOVER_DML_TEST_FLUSH}"
+			"${LIBS_DML}")
+
+	add_link_executable(runtime_test
+			"${SOURCES_RUNTIME_TEST}"
+			"${LIBS_DML}")
+
 	test("data_mover_dml_memcpy" "data_mover_dml_memcpy" test_data_mover_dml_memcpy none)
 	test("data_mover_dml_memmove" "data_mover_dml_memmove" test_data_mover_dml_memmove none)
 	test("data_mover_dml_memset" "data_mover_dml_memset" test_data_mover_dml_memset none)
+	test("data_mover_dml_flush" "data_mover_dml_flush" test_data_mover_dml_flush none)
 	test("vdm_operation_future_poll" "vdm_operation_future_poll" test_vdm_operation_future_poll none)
+	test("runtime_test" "runtime_test" test_runtime none)
 endif()
diff --git a/tests/data_mover_dml_flush/data_mover_dml_flush.c b/tests/data_mover_dml_flush/data_mover_dml_flush.c
new file mode 100644
index 00000000000..5822b52c097
--- /dev/null
+++ b/tests/data_mover_dml_flush/data_mover_dml_flush.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Copyright 2022, Intel Corporation */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libminiasync.h>
+#include <libminiasync-vdm-dml.h>
+#include "test_helpers.h"
+#include "util_dml.h"
+
+#define ARRAY_SIZE 50
+
+static int
+dml_flush(enum data_mover_dml_type type, uint64_t flags)
+{
+	struct runtime *r = runtime_new();
+
+	struct data_mover_dml *dmd = data_mover_dml_new(type);
+	struct vdm *dml_mover_async = data_mover_dml_get_vdm(dmd);
+
+	uint8_t destination_array[ARRAY_SIZE];
+
+	struct vdm_operation_future a_to_b = vdm_flush(dml_mover_async,
+			destination_array, ARRAY_SIZE, flags);
+
+	runtime_wait(r, FUTURE_AS_RUNNABLE(&a_to_b));
+
+	data_mover_dml_delete(dmd);
+
+	runtime_delete(r);
+
+	return 0;
+}
+
+static int
+test_dml_basic_flush()
+{
+	return dml_flush(DATA_MOVER_DML_SOFTWARE, 0);
+}
+
+static int
+test_dml_hw_path_flag_flush()
+{
+	return dml_flush(DATA_MOVER_DML_HARDWARE, 0);
+}
+
+int
+main(void)
+{
+	int ret = test_dml_basic_flush();
+	if (ret)
+		return ret;
+	if (util_dml_check_hw_available() == 0) {
+		ret = test_dml_hw_path_flag_flush();
+		if (ret)
+			return ret;
+	} else {
+		UT_LOG_SKIP("test_dml_hw_path_flag_flush");
+	}
+
+	return 0;
+}
diff --git a/tests/data_mover_dml_flush/test_data_mover_dml_flush.cmake b/tests/data_mover_dml_flush/test_data_mover_dml_flush.cmake
new file mode 100644
index 00000000000..1b2e63d37fd
--- /dev/null
+++ b/tests/data_mover_dml_flush/test_data_mover_dml_flush.cmake
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022, Intel Corporation
+
+# test case for the flush operation with the DML data mover
+
+include(${SRC_DIR}/cmake/test_helpers.cmake)
+
+setup()
+
+# check for MOVDIR64B instruction
+check_movdir64b()
+
+# inform that some test cases involving 'movdir64b' instruction will be skipped
+if (MOVDIR64B EQUAL 0)
+	message(STATUS "movdir64b instruction not available, some test cases will be skipped!")
+endif()
+
+execute(0 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/data_mover_dml_flush)
+
+execute_assert_pass(${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/data_mover_dml_flush)
+
+cleanup()
diff --git a/tests/data_mover_dml_memcpy/test_data_mover_dml_memcpy.cmake b/tests/data_mover_dml_memcpy/test_data_mover_dml_memcpy.cmake
index 8a060124846..b5bc3479638 100644
--- a/tests/data_mover_dml_memcpy/test_data_mover_dml_memcpy.cmake
+++ b/tests/data_mover_dml_memcpy/test_data_mover_dml_memcpy.cmake
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2021-2022, Intel Corporation
 
-# an example for the data_mover_dml test case
+# test case for the memcpy operation with the DML data mover
 
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
@@ -10,7 +10,7 @@ setup()
 # check for MOVDIR64B instruction
 check_movdir64b()
 
-# inform that some test cases involving 'mvodir64b' instruction will be skipped
+# inform that some test cases involving 'movdir64b' instruction will be skipped
 if (MOVDIR64B EQUAL 0)
 	message(STATUS "movdir64b instruction not available, some test cases will be skipped!")
 endif()
diff --git a/tests/data_mover_dml_memmove/test_data_mover_dml_memmove.cmake b/tests/data_mover_dml_memmove/test_data_mover_dml_memmove.cmake
index fdd40f4c230..40b662682f3 100644
--- a/tests/data_mover_dml_memmove/test_data_mover_dml_memmove.cmake
+++ b/tests/data_mover_dml_memmove/test_data_mover_dml_memmove.cmake
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
-# an example for the data_mover_dml test case
+# test case for the memmove operation with the DML data mover
 
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
@@ -10,7 +10,7 @@ setup()
 # check for MOVDIR64B instruction
 check_movdir64b()
 
-# inform that some test cases involving 'mvodir64b' instruction will be skipped
+# inform that some test cases involving 'movdir64b' instruction will be skipped
 if (MOVDIR64B EQUAL 0)
 	message(STATUS "movdir64b instruction not available, some test cases will be skipped!")
 endif()
diff --git a/tests/data_mover_dml_memset/test_data_mover_dml_memset.cmake b/tests/data_mover_dml_memset/test_data_mover_dml_memset.cmake
index dfcc3bac0c9..78459ce51c3 100644
--- a/tests/data_mover_dml_memset/test_data_mover_dml_memset.cmake
+++ b/tests/data_mover_dml_memset/test_data_mover_dml_memset.cmake
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
-# an example for the data_mover_dml test case
+# test case for the memset operation with the DML data mover
 
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
@@ -10,7 +10,7 @@ setup()
 # check for MOVDIR64B instruction
 check_movdir64b()
 
-# inform that some test cases involving 'mvodir64b' instruction will be skipped
+# inform that some test cases involving 'movdir64b' instruction will be skipped
 if (MOVDIR64B EQUAL 0)
 	message(STATUS "movdir64b instruction not available, some test cases will be skipped!")
 endif()
diff --git a/tests/ex_hashmap/test_ex_hashmap.cmake b/tests/ex_hashmap/test_ex_hashmap.cmake
new file mode 100644
index 00000000000..da7f72d8f2f
--- /dev/null
+++ b/tests/ex_hashmap/test_ex_hashmap.cmake
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022, Intel Corporation
+
+# test for the hashmap example
+
+include(${SRC_DIR}/cmake/test_helpers.cmake)
+
+setup()
+
+execute(0 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/example-hashmap)
+
+cleanup()
diff --git a/tests/future/test_future.c b/tests/future/test_future.c
index ae44d6fe014..6f350b79edd 100644
--- a/tests/future/test_future.c
+++ b/tests/future/test_future.c
@@ -4,6 +4,7 @@
 #include "libminiasync/future.h"
 #include "test_helpers.h"
 #include "core/util.h"
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -177,6 +178,10 @@ async_up_down(int count)
 		down_to_result_map, FAKE_MAP_ARG);
 	FUTURE_CHAIN_INIT(&fut);
 
+	/* regular chain entry init: all entries are initialized right away */
+	UT_ASSERTeq(FUTURE_CHAIN_ENTRY_IS_INITIALIZED(&fut.data.up), true);
+	UT_ASSERTeq(FUTURE_CHAIN_ENTRY_IS_INITIALIZED(&fut.data.down), true);
+
 	return fut;
 }
 
@@ -290,8 +295,17 @@ void
 test_lazy_init()
 {
 	struct multiply_up_down_fut fut = async_multiply_up_down(5, 5);
+	UT_ASSERTeq(FUTURE_CHAIN_ENTRY_IS_INITIALIZED(&fut.data.mul), false);
+	UT_ASSERTeq(FUTURE_CHAIN_ENTRY_IS_INITIALIZED(&fut.data.up_down),
+		false);
+
 	while (future_poll(FUTURE_AS_RUNNABLE(&fut), FAKE_NOTIFIER) !=
 		FUTURE_STATE_COMPLETE) { WAIT(); }
+
+	/* we can assume, after polling, that all entries are initialized */
+	UT_ASSERTeq(FUTURE_CHAIN_ENTRY_IS_INITIALIZED(&fut.data.mul), true);
+	UT_ASSERTeq(FUTURE_CHAIN_ENTRY_IS_INITIALIZED(&fut.data.up_down), true);
+
 	struct multiply_up_down_output *mud_output = FUTURE_OUTPUT(&fut);
 	UT_ASSERTeq(mud_output->result_sum, 2);
 	struct multiply_up_down_data *mud_data = FUTURE_DATA(&fut);
diff --git a/tests/future/test_future.cmake b/tests/future/test_future.cmake
index 632f9906586..173da86083e 100644
--- a/tests/future/test_future.cmake
+++ b/tests/future/test_future.cmake
@@ -1,19 +1,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
-# an example for the basic test case
+# test for the future type
 
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
 setup()
 
-# The expected input can be provided to the execute function,
-# which will check if the return code of the binary file matches.
 execute(0 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/future)
 
-# execute_assert_pass can be used instead of manually checking if
-# the return code equals zero, however it cannot be used with a tracer.
-# When used with a tracer assert_pass will be skipped.
 execute_assert_pass(${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/future)
 
 cleanup()
diff --git a/tests/future_properties/future_property_async.c b/tests/future_properties/future_property_async.c
new file mode 100644
index 00000000000..ffd66b8cfe5
--- /dev/null
+++ b/tests/future_properties/future_property_async.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Copyright 2022, Intel Corporation */
+
+#include "libminiasync.h"
+#include "test_helpers.h"
+#include "core/util.h"
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define TEST_MAX_COUNT 20
+#define FAKE_MAP_ARG ((void *)((uintptr_t)(0xFEEDCAFE)))
+
+static uint64_t results[12];
+static uint64_t results_index = 0;
+
+struct countup_data {
+	int counter;
+	int max_count;
+	uint64_t future_id;
+};
+
+struct countup_output {
+	int result;
+};
+
+int
+future_async_property(void *fut, enum future_property property)
+{
+	if (property == FUTURE_PROPERTY_ASYNC)
+		return 1;
+	return 0;
+}
+
+FUTURE(countup_fut, struct countup_data, struct countup_output);
+
+enum future_state
+countup_task(struct future_context *context,
+	struct future_notifier *notifier)
+{
+	struct countup_data *data = future_context_get_data(context);
+	data->counter++;
+	if (data->counter == data->max_count) {
+		struct countup_output *output =
+			future_context_get_output(context);
+		output->result += 1;
+		results[results_index] = data->future_id;
+		results_index++;
+		return FUTURE_STATE_COMPLETE;
+	} else {
+		return FUTURE_STATE_RUNNING;
+	}
+}
+
+struct countup_fut
+countup_async(int max_count, uint64_t future_id)
+{
+	struct countup_fut fut = {.output.result = 0};
+	FUTURE_INIT(&fut, countup_task);
+	fut.data.counter = 0;
+	fut.data.max_count = max_count;
+	fut.output.result = 0;
+	fut.data.future_id = future_id;
+	fut.base.has_property = future_async_property;
+
+	return fut;
+}
+struct countup_fut
+countup_non_async(int max_count, uint64_t future_id)
+{
+	struct countup_fut fut = {.output.result = 0};
+	FUTURE_INIT(&fut, countup_task);
+	fut.data.counter = 0;
+	fut.data.max_count = max_count;
+	fut.output.result = 0;
+	fut.data.future_id = future_id;
+
+	return fut;
+}
+
+/*
+ * test_basic_futures -- tests if basic async/non async futures
+ * are executed in the correct order by runtime.
+ */
+void
+test_basic_futures(void)
+{
+	struct runtime *r = runtime_new();
+	struct future **futures = malloc(sizeof(struct future *) * 3);
+	struct countup_fut up1 = countup_async(TEST_MAX_COUNT, 1);
+	UT_ASSERTeq(FUTURE_STATE(&up1), FUTURE_STATE_IDLE);
+
+	struct countup_fut up2 = countup_non_async(TEST_MAX_COUNT, 2);
+	UT_ASSERTeq(FUTURE_STATE(&up2), FUTURE_STATE_IDLE);
+
+	struct countup_fut up3 = countup_non_async(TEST_MAX_COUNT, 3);
+	UT_ASSERTeq(FUTURE_STATE(&up3), FUTURE_STATE_IDLE);
+
+	futures[0] = FUTURE_AS_RUNNABLE(&up2);
+	futures[1] = FUTURE_AS_RUNNABLE(&up1);
+	futures[2] = FUTURE_AS_RUNNABLE(&up3);
+
+	runtime_wait_multiple(r, futures, 3);
+
+	UT_ASSERTeq(results[0], 1);
+	UT_ASSERTeq(results[1], 2);
+	UT_ASSERTeq(results[2], 3);
+
+	free(futures);
+	runtime_delete(r);
+}
+
+struct chained_up_fut_data {
+	FUTURE_CHAIN_ENTRY(struct countup_fut, up1);
+	FUTURE_CHAIN_ENTRY(struct countup_fut, up2);
+};
+
+struct chained_up_fut_output {
+	int result_sum;
+};
+
+FUTURE(chained_up_fut, struct chained_up_fut_data,
+		struct chained_up_fut_output);
+
+void
+up1_to_up2_map(struct future_context *lhs, struct future_context *rhs,
+	void *arg)
+{
+	struct countup_output *up1_output = future_context_get_data(lhs);
+	struct countup_output *up2_output = future_context_get_output(rhs);
+	up2_output->result += up1_output->result;
+}
+
+void
+up2_to_result_map(struct future_context *lhs, struct future_context *rhs,
+	void *arg)
+{
+	struct countup_output *up2_output = future_context_get_output(lhs);
+	struct chained_up_fut_output *output = future_context_get_output(rhs);
+	output->result_sum = up2_output->result;
+}
+
+struct chained_up_fut
+countup_chained_sync_async(int count, uint64_t id_fut1, uint64_t id_fut2)
+{
+	struct chained_up_fut fut = {.output.result_sum = 0};
+	FUTURE_CHAIN_ENTRY_INIT(&fut.data.up1,
+		countup_non_async(count, id_fut1),
+		up1_to_up2_map, FAKE_MAP_ARG);
+	FUTURE_CHAIN_ENTRY_INIT(&fut.data.up2,
+		countup_async(count, id_fut2),
+		up2_to_result_map, FAKE_MAP_ARG);
+	FUTURE_CHAIN_INIT(&fut);
+
+	return fut;
+}
+
+struct chained_up_fut
+countup_chained_async_sync(int count, uint64_t id_fut1, uint64_t id_fut2)
+{
+	struct chained_up_fut fut = {.output.result_sum = 0};
+	FUTURE_CHAIN_ENTRY_INIT(&fut.data.up1, countup_async(count, id_fut1),
+		up1_to_up2_map, FAKE_MAP_ARG);
+	FUTURE_CHAIN_ENTRY_INIT(&fut.data.up2,
+		countup_non_async(count, id_fut2),
+		up2_to_result_map, FAKE_MAP_ARG);
+	FUTURE_CHAIN_INIT(&fut);
+
+	return fut;
+}
+
+/*
+ * test_chained_futures -- tests if async/non async chained futures'
+ * entires are executed in the correct order by runtime.
+ */
+void
+test_chained_future()
+{
+	struct runtime *r = runtime_new();
+	struct future **futures = malloc(sizeof(struct future *) * 3);
+	struct chained_up_fut fut1 =
+		countup_chained_sync_async(TEST_MAX_COUNT, 3, 4);
+	struct chained_up_fut fut2 =
+		countup_chained_async_sync(TEST_MAX_COUNT, 5, 6);
+	struct chained_up_fut fut3 =
+		countup_chained_sync_async(TEST_MAX_COUNT, 7, 8);
+
+	futures[0] = FUTURE_AS_RUNNABLE(&fut1);
+	futures[1] = FUTURE_AS_RUNNABLE(&fut2);
+	futures[2] = FUTURE_AS_RUNNABLE(&fut3);
+
+	runtime_wait_multiple(r, futures, 3);
+
+	UT_ASSERTeq(results[3], 5);
+	UT_ASSERTeq(results[4], 3);
+	UT_ASSERTeq(results[5], 7);
+	UT_ASSERTeq(results[6], 4);
+	UT_ASSERTeq(results[7], 8);
+	UT_ASSERTeq(results[8], 6);
+
+	free(futures);
+	runtime_delete(r);
+}
+
+struct change_flag_fut_data {
+	FUTURE_CHAIN_ENTRY(struct countup_fut, up1);
+};
+
+struct change_flag_fut_output {
+	int result_sum;
+};
+
+FUTURE(change_flag_fut, struct change_flag_fut_data,
+		struct change_flag_fut_output);
+
+void
+up_to_result_map_change_flag(struct future_context *lhs,
+		struct future_context *rhs, void *arg)
+{
+	struct countup_fut *fut = arg;
+	struct countup_output *up1_output = future_context_get_output(lhs);
+	struct change_flag_fut_output *output = future_context_get_output(rhs);
+	if (output->result_sum == 10) {
+		fut->base.has_property = future_async_property;
+	}
+	output->result_sum = up1_output->result;
+}
+
+struct change_flag_fut
+countup_change_flag(int count, uint64_t id_fut)
+{
+	struct change_flag_fut fut = {.output.result_sum = 0};
+	FUTURE_CHAIN_ENTRY_INIT(&fut.data.up1,
+		countup_non_async(count, id_fut),
+		up1_to_up2_map, FAKE_MAP_ARG);
+	FUTURE_CHAIN_INIT(&fut);
+
+	return fut;
+}
+
+/*
+ * test_change_flag_future -- tests if runtime executes
+ * async/non async futures correctly, when a future changes its
+ * async flag during execution.
+ */
+void
+test_change_flag_future()
+{
+	struct runtime *r = runtime_new();
+	struct future **futures = malloc(sizeof(struct future *) * 3);
+	struct change_flag_fut fut1 = countup_change_flag(TEST_MAX_COUNT, 9);
+	struct countup_fut fut2 = countup_async(TEST_MAX_COUNT, 10);
+	struct countup_fut fut3 = countup_non_async(TEST_MAX_COUNT, 11);
+
+	futures[0] = FUTURE_AS_RUNNABLE(&fut1);
+	futures[1] = FUTURE_AS_RUNNABLE(&fut2);
+	futures[2] = FUTURE_AS_RUNNABLE(&fut3);
+
+	runtime_wait_multiple(r, futures, 3);
+
+	UT_ASSERTeq(results[9], 10);
+	UT_ASSERTeq(results[10], 9);
+	UT_ASSERTeq(results[11], 11);
+
+	free(futures);
+	runtime_delete(r);
+}
+
+int
+main(void)
+{
+	test_basic_futures();
+	test_chained_future();
+	test_change_flag_future();
+
+	return 0;
+}
diff --git a/tests/future_properties/test_future_properties.cmake b/tests/future_properties/test_future_properties.cmake
new file mode 100644
index 00000000000..c626318fd62
--- /dev/null
+++ b/tests/future_properties/test_future_properties.cmake
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022, Intel Corporation
+
+include(${SRC_DIR}/cmake/test_helpers.cmake)
+
+setup()
+
+execute(0 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/future_property_async)
+
+execute_assert_pass(${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/future_property_async)
+
+cleanup()
diff --git a/tests/membuf/membuf_simple.c b/tests/membuf/membuf_simple.c
index e5ea5d98881..d6434c76d67 100644
--- a/tests/membuf/membuf_simple.c
+++ b/tests/membuf/membuf_simple.c
@@ -4,6 +4,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "core/membuf.h"
+#include "os_thread.h"
 #include "test_helpers.h"
 
 #define TEST_USER_DATA (void *)(0xC0FFEE)
@@ -15,8 +16,34 @@ struct test_entry {
 	char padding[TEST_ENTRY_PADDING];
 };
 
-int
-main(int argc, char *argv[])
+void *
+membuf_alloc_thread(void *arg)
+{
+	struct membuf *mbuf = arg;
+	return membuf_alloc(mbuf, 1);
+}
+
+void
+membuf_test_mt_reuse()
+{
+	struct membuf *mbuf = membuf_new(NULL);
+
+	os_thread_t th1;
+	os_thread_create(&th1, NULL, membuf_alloc_thread, mbuf);
+	void *ptr1;
+	os_thread_join(&th1, &ptr1);
+	os_thread_t th2;
+	os_thread_create(&th2, NULL, membuf_alloc_thread, mbuf);
+	void *ptr2;
+	os_thread_join(&th2, &ptr2);
+
+	UT_ASSERTne(ptr1, ptr2);
+
+	membuf_delete(mbuf);
+}
+
+void
+membuf_test_st_reuse()
 {
 	struct membuf *mbuf = membuf_new(TEST_USER_DATA);
 	UT_ASSERTne(mbuf, NULL);
@@ -74,6 +101,13 @@ main(int argc, char *argv[])
 
 	membuf_delete(mbuf);
 	free(entries);
+}
+
+int
+main(int argc, char *argv[])
+{
+	membuf_test_st_reuse();
+	membuf_test_mt_reuse();
 
 	return 0;
 }
diff --git a/tests/memcpy_threads/test_memcpy_threads.cmake b/tests/memcpy_threads/test_memcpy_threads.cmake
index 3992903fc6d..5bc254767e8 100644
--- a/tests/memcpy_threads/test_memcpy_threads.cmake
+++ b/tests/memcpy_threads/test_memcpy_threads.cmake
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
+# test case for the memcpy operation with the thread data mover
+
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
 setup()
diff --git a/tests/memmove_sync/test_memmove_sync.cmake b/tests/memmove_sync/test_memmove_sync.cmake
index 6b4fe55cc64..036d4b395ee 100644
--- a/tests/memmove_sync/test_memmove_sync.cmake
+++ b/tests/memmove_sync/test_memmove_sync.cmake
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
+# test case for the memmove operation with the sync data mover
+
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
 setup()
diff --git a/tests/memmove_threads/test_memmove_threads.cmake b/tests/memmove_threads/test_memmove_threads.cmake
index d4607442951..fb7376b136d 100644
--- a/tests/memmove_threads/test_memmove_threads.cmake
+++ b/tests/memmove_threads/test_memmove_threads.cmake
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
+# test case for the memmove operation with the thread data mover
+
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
 setup()
diff --git a/tests/memset_sync/test_memset_sync.cmake b/tests/memset_sync/test_memset_sync.cmake
index 3e19bee069d..11c4bb3e766 100644
--- a/tests/memset_sync/test_memset_sync.cmake
+++ b/tests/memset_sync/test_memset_sync.cmake
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
+# test case for the memset operation with the sync data mover
+
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
 setup()
diff --git a/tests/memset_threads/memset_threads.c b/tests/memset_threads/memset_threads.c
index aff895e9cee..2c9727e21bb 100644
--- a/tests/memset_threads/memset_threads.c
+++ b/tests/memset_threads/memset_threads.c
@@ -81,24 +81,24 @@ test_thread_memset_multiple(size_t str_len)
 	if (futures == NULL)
 		UT_FATAL("futures out of memory");
 
-	struct vdm_operation_future memmove_fut_symbol_f_to_a =
+	struct vdm_operation_future memset_fut_symbol_f_to_a =
 		vdm_memset(vdm, buf_a, symbol_first, str_len / 2, 0);
 
-	struct vdm_operation_future memmove_fut_symbol_s_to_a =
+	struct vdm_operation_future memset_fut_symbol_s_to_a =
 		vdm_memset(vdm, buf_a + str_len / 2, symbol_second,
 			str_len / 2, 0);
 
-	struct vdm_operation_future memmove_fut_symbol_s_to_b =
+	struct vdm_operation_future memset_fut_symbol_s_to_b =
 		vdm_memset(vdm, buf_b, symbol_second, str_len / 2, 0);
 
-	struct vdm_operation_future memmove_fut_symbol_f_to_b =
+	struct vdm_operation_future memset_fut_symbol_f_to_b =
 		vdm_memset(vdm, buf_b + str_len / 2, symbol_first,
 			str_len / 2, 0);
 
-	futures[0] = FUTURE_AS_RUNNABLE(&memmove_fut_symbol_f_to_a);
-	futures[1] = FUTURE_AS_RUNNABLE(&memmove_fut_symbol_s_to_a);
-	futures[2] = FUTURE_AS_RUNNABLE(&memmove_fut_symbol_s_to_b);
-	futures[3] = FUTURE_AS_RUNNABLE(&memmove_fut_symbol_f_to_b);
+	futures[0] = FUTURE_AS_RUNNABLE(&memset_fut_symbol_f_to_a);
+	futures[1] = FUTURE_AS_RUNNABLE(&memset_fut_symbol_s_to_a);
+	futures[2] = FUTURE_AS_RUNNABLE(&memset_fut_symbol_s_to_b);
+	futures[3] = FUTURE_AS_RUNNABLE(&memset_fut_symbol_f_to_b);
 
 	runtime_wait_multiple(r, futures, 4);
 
diff --git a/tests/memset_threads/test_memset_threads.cmake b/tests/memset_threads/test_memset_threads.cmake
index 791f7502116..3991b8cd969 100644
--- a/tests/memset_threads/test_memset_threads.cmake
+++ b/tests/memset_threads/test_memset_threads.cmake
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
+# test case for the memset operation with the thread data mover
+
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
 setup()
diff --git a/tests/runtime_test/runtime_test.c b/tests/runtime_test/runtime_test.c
new file mode 100644
index 00000000000..a5bd8481a97
--- /dev/null
+++ b/tests/runtime_test/runtime_test.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Copyright 2022, Intel Corporation */
+
+#include <libminiasync.h>
+#include <libminiasync-vdm-dml.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include "util_dml.h"
+#include "core/os.h"
+#include "test_helpers.h"
+
+/*
+ * test_runtime_wait_multiple -- test executing multiple futures
+ * with different data movers.
+ */
+int
+test_runtime_wait_multiple(size_t str_len)
+{
+	int ret = 0;
+	unsigned seed = (unsigned)time(NULL);
+	fprintf(stdout, "seed: %u\n", seed);
+	struct runtime *r = runtime_new();
+
+	struct data_mover_sync *dms = data_mover_sync_new();
+	struct data_mover_threads *dmt = data_mover_threads_default();
+	struct data_mover_dml *dmd = data_mover_dml_new(DATA_MOVER_DML_AUTO);
+
+	struct vdm *vdm_sync = data_mover_sync_get_vdm(dms);
+	struct vdm *vdm_threads = data_mover_threads_get_vdm(dmt);
+	struct vdm *vdm_dml = data_mover_dml_get_vdm(dmd);
+
+	char *buf_a = malloc(str_len);
+	char *buf_b = malloc(str_len);
+
+	if (!buf_a || !buf_b)
+		UT_FATAL("buffers out of memory");
+
+	char symbol_first = (char)os_rand_r(&seed) % 256;
+	char symbol_second = (char)os_rand_r(&seed) % 256;
+
+	struct future **futures = malloc(sizeof(struct future *) * 4);
+	if (futures == NULL)
+		UT_FATAL("futures out of memory");
+
+	struct vdm_operation_future memset_fut_symbol_f_to_a =
+		vdm_memset(vdm_threads, buf_a, symbol_first, str_len / 2, 0);
+
+	struct vdm_operation_future memset_fut_symbol_s_to_a =
+		vdm_memset(vdm_sync, buf_a + str_len / 2, symbol_second,
+			str_len / 2, 0);
+
+	struct vdm_operation_future memset_fut_symbol_s_to_b =
+		vdm_memset(vdm_dml, buf_b, symbol_second, str_len / 2, 0);
+
+	struct vdm_operation_future memset_fut_symbol_f_to_b =
+		vdm_memset(vdm_sync, buf_b + str_len / 2, symbol_first,
+			str_len / 2, 0);
+
+	futures[0] = FUTURE_AS_RUNNABLE(&memset_fut_symbol_f_to_a);
+	futures[1] = FUTURE_AS_RUNNABLE(&memset_fut_symbol_s_to_a);
+	futures[2] = FUTURE_AS_RUNNABLE(&memset_fut_symbol_s_to_b);
+	futures[3] = FUTURE_AS_RUNNABLE(&memset_fut_symbol_f_to_b);
+
+	runtime_wait_multiple(r, futures, 4);
+
+	for (size_t i = 0; i < str_len / 2; i++) {
+		UT_ASSERTeq(buf_a[i], symbol_first);
+		UT_ASSERTeq(buf_b[i], symbol_second);
+	}
+
+	for (size_t i = str_len / 2; i < str_len; i++) {
+		UT_ASSERTeq(buf_a[i], symbol_second);
+		UT_ASSERTeq(buf_b[i], symbol_first);
+	}
+
+	free(buf_a);
+	free(buf_b);
+	free(futures);
+
+	runtime_delete(r);
+	data_mover_sync_delete(dms);
+	data_mover_threads_delete(dmt);
+	data_mover_dml_delete(dmd);
+
+	return ret;
+}
+
+int
+main(void)
+{
+	int ret = 0;
+	size_t size = 512;
+
+	ret = test_runtime_wait_multiple(size);
+
+	return ret;
+}
diff --git a/tests/runtime_test/test_runtime.cmake b/tests/runtime_test/test_runtime.cmake
new file mode 100644
index 00000000000..10370f74b59
--- /dev/null
+++ b/tests/runtime_test/test_runtime.cmake
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022, Intel Corporation
+
+include(${SRC_DIR}/cmake/test_helpers.cmake)
+
+setup()
+
+# check for MOVDIR64B instruction
+check_movdir64b()
+
+# inform that some test cases involving 'mvodir64b' instruction will be skipped
+if (MOVDIR64B EQUAL 0)
+	message(STATUS "movdir64b instruction not available, some test cases will be skipped!")
+endif()
+
+execute(0 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/runtime_test)
+
+execute_assert_pass(${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/runtime_test)
+
+cleanup()
diff --git a/tests/vdm/test_vdm.cmake b/tests/vdm/test_vdm.cmake
index 27c3b1e97f9..a814b0c205e 100644
--- a/tests/vdm/test_vdm.cmake
+++ b/tests/vdm/test_vdm.cmake
@@ -1,19 +1,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
-# an example for the basic test case
+# test for the vdm
 
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
 setup()
 
-# The expected input can be provided to the execute function,
-# which will check if the return code of the binary file matches.
 execute(0 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/vdm)
 
-# execute_assert_pass can be used instead of manually checking if
-# the return code equals zero, however it cannot be used with a tracer.
-# When used with a tracer assert_pass will be skipped.
 execute_assert_pass(${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BUILD}/vdm)
 
 cleanup()
diff --git a/tests/vdm_operation_future_poll/test_vdm_operation_future_poll.cmake b/tests/vdm_operation_future_poll/test_vdm_operation_future_poll.cmake
index c909a5a0665..47d7da8e4fd 100644
--- a/tests/vdm_operation_future_poll/test_vdm_operation_future_poll.cmake
+++ b/tests/vdm_operation_future_poll/test_vdm_operation_future_poll.cmake
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2022, Intel Corporation
 
+# test the future poll operation with various data movers
+
 include(${SRC_DIR}/cmake/test_helpers.cmake)
 
 setup()
diff --git a/utils/check-commit.sh b/utils/check-commit.sh
index 9f8c31a9c9b..02727f7d21d 100755
--- a/utils/check-commit.sh
+++ b/utils/check-commit.sh
@@ -1,10 +1,10 @@
 #!/usr/bin/env bash
 # SPDX-License-Identifier: BSD-3-Clause
-# Copyright 2016-2021, Intel Corporation
+# Copyright 2016-2022, Intel Corporation
 
 #
-# Used to check whether all the commit messages in a pull request
-# follow the GIT/RPMA guidelines.
+# Used to check whether all commit messages in a pull request
+# follow the GIT and/or project's guidelines.
 #
 # usage: ./check-commit.sh commit
 #
diff --git a/utils/check-commits.sh b/utils/check-commits.sh
index b68ea045510..c9a28b17d88 100755
--- a/utils/check-commits.sh
+++ b/utils/check-commits.sh
@@ -1,10 +1,10 @@
 #!/usr/bin/env bash
 # SPDX-License-Identifier: BSD-3-Clause
-# Copyright 2016-2020, Intel Corporation
+# Copyright 2016-2022, Intel Corporation
 
 #
-# Used to check whether all the commit messages in a pull request
-# follow the GIT/RPMA guidelines.
+# Used to check whether all commit messages in a pull request
+# follow the GIT and/or project's guidelines.
 #
 # usage: ./check-commits.sh [range]
 #
diff --git a/utils/md2man/default.man b/utils/md2man/default.man
index 20b7152805d..04545e7aa23 100644
--- a/utils/md2man/default.man
+++ b/utils/md2man/default.man
@@ -8,7 +8,7 @@ $endif$
 $if(adjusting)$
 .ad $adjusting$
 $endif$
-.TH "$title$" "$section$" "$date$" "RPMA - $secondary_title$ version $version$" "RPMA Programmer's Manual"
+.TH "$title$" "$section$" "$date$" "MINIASYNC - $secondary_title$ version $version$" "MINIASYNC Programmer's Manual"
 $if(hyphenate)$
 .hy
 $else$
diff --git a/utils/src2mans.sh b/utils/src2mans.sh
index 3bd568232d7..84b98a86812 100755
--- a/utils/src2mans.sh
+++ b/utils/src2mans.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # SPDX-License-Identifier: BSD-3-Clause
-# Copyright 2020-2021, Intel Corporation
+# Copyright 2020-2022, Intel Corporation
 
 #
 # src2mans -- extract man pages from source files
@@ -61,7 +61,7 @@ do
 	MANUALS="$(mktemp)"
 	ERRORS="$(mktemp)"
 
-	src2man -r RPMA -v "RPMA Programmer's Manual" $MAN > $MANUALS 2> $ERRORS
+	src2man -r MINIASYNC -v "MINIASYNC Programmer's Manual" $MAN > $MANUALS 2> $ERRORS
 	# gawk 5.0.1 does not recognize expressions \;|\,|\o  as regex operator
 	sed -i -r "/warning: regexp escape sequence \`[\][;,o]' is not a known regexp operator/d" $ERRORS
 	# remove empty lines