From b1d9a57f0277f530b18ff685dca1189b2086b111 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Mon, 18 Jul 2022 15:51:54 +0200 Subject: [PATCH] obj: ensure zones are reclaimed prior to free This patch fixes a bug where free prior to any allocs, combined with reservations, could have led to overlapping allocations - which ultimately was causing corrupted heap and incorrect statistics. This problem is caused by lazy heap runtime state reclamation. Heap runtime state is rebuilt lazily whenever required to serve allocation requests. Deallocations (free) simply update persistent metadata and, in case of huge allocations, inserts the freed chunk into a container of free chunks. On reclaim, all free chunks not already deallocated are inserted into a freelist. This would have been fine, but libpmemobj's allocator enables software to reserve a chunk, removing it from the heap runtime state without updating the persistent on-media layout. This means that software can deallocate a chunk, reserve that same chunk, allocate something normally - triggering heap zone reclamation, and then it can finally publish (actually persistently allocate) that reserved chunk. This can lead to the same chunk being potentially allocated twice... This patch fixes this problem by ensuring that object's zone is fully processed and reclaimed prior to deallocation. Reported-by: @jolivier23 --- src/PMDK.sln | 7 ++ src/libpmemobj/heap.c | 67 +++++++++++-- src/libpmemobj/heap.h | 5 +- src/libpmemobj/palloc.c | 7 +- src/test/Makefile | 3 +- src/test/obj_heap_reopen/.gitignore | 1 + src/test/obj_heap_reopen/Makefile | 13 +++ src/test/obj_heap_reopen/TESTS.py | 20 ++++ src/test/obj_heap_reopen/obj_heap_reopen.c | 62 ++++++++++++ .../obj_heap_reopen/obj_heap_reopen.vcxproj | 98 +++++++++++++++++++ .../obj_heap_reopen.vcxproj.filters | 26 +++++ 11 files changed, 297 insertions(+), 12 deletions(-) create mode 100644 src/test/obj_heap_reopen/.gitignore create mode 100644 src/test/obj_heap_reopen/Makefile create mode 100755 src/test/obj_heap_reopen/TESTS.py create mode 100644 src/test/obj_heap_reopen/obj_heap_reopen.c create mode 100644 src/test/obj_heap_reopen/obj_heap_reopen.vcxproj create mode 100644 src/test/obj_heap_reopen/obj_heap_reopen.vcxproj.filters diff --git a/src/PMDK.sln b/src/PMDK.sln index 75bef0be3a1..fbe1c8b167e 100644 --- a/src/PMDK.sln +++ b/src/PMDK.sln @@ -905,6 +905,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_list_remove", "test\obj EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_defrag", "test\obj_defrag\obj_defrag.vcxproj", "{FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_heap_reopen", "test\obj_heap_reopen\obj_heap_reopen.vcxproj", "{FF6E5B0C-DC00-4C93-B9C2-63D1E858BA81}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -2040,6 +2042,10 @@ Global {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}.Debug|x64.Build.0 = Debug|x64 {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}.Release|x64.ActiveCfg = Release|x64 {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}.Release|x64.Build.0 = Release|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA81}.Debug|x64.ActiveCfg = Debug|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA81}.Debug|x64.Build.0 = Debug|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA81}.Release|x64.ActiveCfg = Release|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA81}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -2364,6 +2370,7 @@ Global {FEA09B48-34C2-4963-8A5A-F97BDA136D72} = {B870D8A6-12CD-4DD0-B843-833695C2310A} {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA79} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA81} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {5E690324-2D48-486A-8D3C-DCB520D3F693} diff --git a/src/libpmemobj/heap.c b/src/libpmemobj/heap.c index cf81d0310be..48058842add 100644 --- a/src/libpmemobj/heap.c +++ b/src/libpmemobj/heap.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -/* Copyright 2015-2021, Intel Corporation */ +/* Copyright 2015-2022, Intel Corporation */ /* * heap.c -- heap implementation @@ -10,6 +10,8 @@ #include #include +#include "bucket.h" +#include "heap_layout.h" #include "libpmemobj/ctl.h" #include "queue.h" #include "heap.h" @@ -90,7 +92,7 @@ struct heap_rt { unsigned nlocks; unsigned nzones; - unsigned zones_exhausted; + int *zone_reclaimed_map; }; /* @@ -739,6 +741,41 @@ heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, } } +/* + * heap_ensure_zone_reclaimed -- make sure that the specified zone has been + * already reclaimed. + */ +void +heap_ensure_zone_reclaimed(struct palloc_heap *heap, uint32_t zone_id) +{ + int zone_reclaimed; + util_atomic_load_explicit32(&heap->rt->zone_reclaimed_map[zone_id], + &zone_reclaimed, memory_order_acquire); + if (zone_reclaimed) + return; + + struct bucket *defb = heap_bucket_acquire(heap, + DEFAULT_ALLOC_CLASS_ID, + HEAP_ARENA_PER_THREAD); + + struct zone *z = ZID_TO_ZONE(heap->layout, zone_id); + ASSERTeq(z->header.magic, ZONE_HEADER_MAGIC); + + /* check a second time just to make sure no other thread was first */ + util_atomic_load_explicit32(&heap->rt->zone_reclaimed_map[zone_id], + &zone_reclaimed, memory_order_acquire); + if (zone_reclaimed) + goto out; + + util_atomic_store_explicit32(&heap->rt->zone_reclaimed_map[zone_id], 1, + memory_order_release); + + heap_reclaim_zone_garbage(heap, defb, zone_id); + +out: + heap_bucket_release(heap, defb); +} + /* * heap_populate_bucket -- (internal) creates volatile state of memory blocks */ @@ -747,11 +784,19 @@ heap_populate_bucket(struct palloc_heap *heap, struct bucket *bucket) { struct heap_rt *h = heap->rt; + unsigned zone_id; /* first not reclaimed zone */ + for (zone_id = 0; zone_id < h->nzones; ++zone_id) { + if (h->zone_reclaimed_map[zone_id] == 0) + break; + } + /* at this point we are sure that there's no more memory in the heap */ - if (h->zones_exhausted == h->nzones) + if (zone_id == h->nzones) return ENOMEM; - uint32_t zone_id = h->zones_exhausted++; + util_atomic_store_explicit32(&heap->rt->zone_reclaimed_map[zone_id], 1, + memory_order_release); + struct zone *z = ZID_TO_ZONE(heap->layout, zone_id); /* ignore zone and chunk headers */ @@ -1576,6 +1621,13 @@ heap_boot(struct palloc_heap *heap, void *heap_start, uint64_t heap_size, goto error_heap_malloc; } + h->nzones = heap_max_zone(heap_size); + h->zone_reclaimed_map = Zalloc(sizeof(int) * h->nzones); + if (h->zone_reclaimed_map == NULL) { + err = ENOMEM; + goto err_reclaimed_map_malloc; + } + if ((err = arena_thread_assignment_init(&h->arenas.assignment, Default_arenas_assignment_type)) != 0) { goto error_assignment_init; @@ -1594,10 +1646,6 @@ heap_boot(struct palloc_heap *heap, void *heap_start, uint64_t heap_size, goto error_arenas_malloc; } - h->nzones = heap_max_zone(heap_size); - - h->zones_exhausted = 0; - h->nlocks = On_valgrind ? MAX_RUN_LOCKS_VG : MAX_RUN_LOCKS; for (unsigned i = 0; i < h->nlocks; ++i) util_mutex_init(&h->run_locks[i]); @@ -1634,6 +1682,8 @@ heap_boot(struct palloc_heap *heap, void *heap_start, uint64_t heap_size, error_alloc_classes_new: arena_thread_assignment_fini(&h->arenas.assignment); error_assignment_init: + Free(h->zone_reclaimed_map); +err_reclaimed_map_malloc: Free(h); heap->rt = NULL; error_heap_malloc: @@ -1729,6 +1779,7 @@ heap_cleanup(struct palloc_heap *heap) VALGRIND_DO_DESTROY_MEMPOOL(heap->layout); + Free(rt->zone_reclaimed_map); Free(rt); heap->rt = NULL; } diff --git a/src/libpmemobj/heap.h b/src/libpmemobj/heap.h index 04dd2cea08f..b173eadce9c 100644 --- a/src/libpmemobj/heap.h +++ b/src/libpmemobj/heap.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause */ -/* Copyright 2015-2021, Intel Corporation */ +/* Copyright 2015-2022, Intel Corporation */ /* * heap.h -- internal definitions for heap @@ -73,6 +73,9 @@ heap_discard_run(struct palloc_heap *heap, struct memory_block *m); void heap_memblock_on_free(struct palloc_heap *heap, const struct memory_block *m); +void +heap_ensure_zone_reclaimed(struct palloc_heap *heap, uint32_t zone_id); + int heap_free_chunk_reuse(struct palloc_heap *heap, struct bucket *bucket, struct memory_block *m); diff --git a/src/libpmemobj/palloc.c b/src/libpmemobj/palloc.c index 856aba6be00..0dfdd789986 100644 --- a/src/libpmemobj/palloc.c +++ b/src/libpmemobj/palloc.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -/* Copyright 2015-2020, Intel Corporation */ +/* Copyright 2015-2022, Intel Corporation */ /* * palloc.c -- implementation of pmalloc POSIX-like API @@ -263,7 +263,8 @@ palloc_heap_action_exec(struct palloc_heap *heap, struct operation_context *ctx) { #ifdef DEBUG - if (act->m.m_ops->get_state(&act->m) == act->new_state) { + enum memblock_state s = act->m.m_ops->get_state(&act->m); + if (s == act->new_state || s == MEMBLOCK_STATE_UNKNOWN) { ERR("invalid operation or heap corruption"); ASSERT(0); } @@ -613,6 +614,8 @@ palloc_defer_free_create(struct palloc_heap *heap, uint64_t off, out->offset = off; out->m = memblock_from_offset(heap, off); + heap_ensure_zone_reclaimed(heap, out->m.zone_id); + /* * For the duration of free we may need to protect surrounding * metadata from being modified. diff --git a/src/test/Makefile b/src/test/Makefile index 803f5049b30..62c09d0dacb 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright 2014-2021, Intel Corporation +# Copyright 2014-2022, Intel Corporation # # @@ -69,6 +69,7 @@ OBJ_TESTS = \ obj_fragmentation2\ obj_heap\ obj_heap_interrupt\ + obj_heap_reopen\ obj_heap_state\ obj_include\ obj_lane\ diff --git a/src/test/obj_heap_reopen/.gitignore b/src/test/obj_heap_reopen/.gitignore new file mode 100644 index 00000000000..f90fc047d56 --- /dev/null +++ b/src/test/obj_heap_reopen/.gitignore @@ -0,0 +1 @@ +obj_heap_reopen diff --git a/src/test/obj_heap_reopen/Makefile b/src/test/obj_heap_reopen/Makefile new file mode 100644 index 00000000000..006a403083c --- /dev/null +++ b/src/test/obj_heap_reopen/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2022, Intel Corporation + +# +# src/test/obj_heap_reopen/Makefile -- build obj_heap_reopen test +# +TARGET = obj_heap_reopen +OBJS = obj_heap_reopen.o + +LIBPMEMOBJ=y + +include ../Makefile.inc +INCS += -I../../libpmemobj diff --git a/src/test/obj_heap_reopen/TESTS.py b/src/test/obj_heap_reopen/TESTS.py new file mode 100755 index 00000000000..bcb4445615e --- /dev/null +++ b/src/test/obj_heap_reopen/TESTS.py @@ -0,0 +1,20 @@ +#!../env.py +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2022, Intel Corporation + + +import testframework as t +from testframework import granularity as g + + +class BASIC(t.Test): + test_type = t.Medium + + def run(self, ctx): + filepath = ctx.create_holey_file(16 * t.MiB, 'testfile1') + ctx.exec('obj_heap_reopen', filepath) + + +@g.require_granularity(g.BYTE, g.CACHELINE) +class TEST0(BASIC): + pass diff --git a/src/test/obj_heap_reopen/obj_heap_reopen.c b/src/test/obj_heap_reopen/obj_heap_reopen.c new file mode 100644 index 00000000000..1c513501a8c --- /dev/null +++ b/src/test/obj_heap_reopen/obj_heap_reopen.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2022, Intel Corporation */ + +/* + * obj_heap_reopen.c -- test for reopening an existing heap and deallocating + * objects prior to any allocations to validate the memory reclamation process. + */ + +#include + +#include "libpmemobj/action_base.h" +#include "libpmemobj/atomic_base.h" +#include "out.h" +#include "unittest.h" +#include "obj.h" + +#define TEST_OBJECT_SIZE (4 << 20) + +int +main(int argc, char *argv[]) +{ + START(argc, argv, "obj_heap_reopen"); + + if (argc < 2) + UT_FATAL("usage: %s file-name", argv[0]); + + const char *path = argv[1]; + PMEMobjpool *pop = NULL; + + if ((pop = pmemobj_create(path, POBJ_LAYOUT_NAME(basic), + 0, S_IWUSR | S_IRUSR)) == NULL) + UT_FATAL("!pmemobj_create: %s", path); + + PMEMoid oid; + pmemobj_alloc(pop, &oid, 4 << 20, 0, NULL, NULL); + + pmemobj_close(pop); + + if ((pop = pmemobj_open(path, POBJ_LAYOUT_NAME(basic))) == NULL) + UT_FATAL("!pmemobj_open: %s", path); + + uint64_t freed_oid_off = oid.off; + pmemobj_free(&oid); + + struct pobj_action act; + oid = pmemobj_reserve(pop, &act, TEST_OBJECT_SIZE, 0); + UT_ASSERTeq(oid.off, freed_oid_off); + + for (;;) { + PMEMoid oid2; + if (pmemobj_alloc(pop, &oid2, 1, 0, NULL, NULL) != 0) + break; + UT_ASSERT(!(oid2.off >= oid.off && + oid2.off <= oid.off + TEST_OBJECT_SIZE)); + } + + pmemobj_publish(pop, &act, 1); + + pmemobj_close(pop); + + DONE(NULL); +} diff --git a/src/test/obj_heap_reopen/obj_heap_reopen.vcxproj b/src/test/obj_heap_reopen/obj_heap_reopen.vcxproj new file mode 100644 index 00000000000..163353215f0 --- /dev/null +++ b/src/test/obj_heap_reopen/obj_heap_reopen.vcxproj @@ -0,0 +1,98 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {58386481-30B7-40FC-96AF-0723A4A7B228} + Win32Proj + obj_heap_reopen + 10.0.17134.0 + + + + Application + true + v140 + + + Application + false + v140 + + + + + + + + + + + + + + + + + true + $(SolutionDir)libpmemobj;$(IncludePath) + + + $(SolutionDir)libpmemobj;$(IncludePath) + + + + NotUsing + Disabled + + + CompileAsCpp + + + + + + + + + NotUsing + MaxSpeed + + + stdafx.h + CompileAsCpp + + + + + + + + + + + + + + + {1baa1617-93ae-4196-8a1a-bd492fb18aef} + + + {9e9e3d25-2139-4a5d-9200-18148ddead45} + + + {ce3f2dfb-8470-4802-ad37-21caf6cb2681} + + + + + + \ No newline at end of file diff --git a/src/test/obj_heap_reopen/obj_heap_reopen.vcxproj.filters b/src/test/obj_heap_reopen/obj_heap_reopen.vcxproj.filters new file mode 100644 index 00000000000..e49abc3b0cc --- /dev/null +++ b/src/test/obj_heap_reopen/obj_heap_reopen.vcxproj.filters @@ -0,0 +1,26 @@ + + + + + {c0f52631-8f99-42f0-8d94-07ddeba87436} + py + + + {fdf90df9-299e-4772-9987-80b460bd2573} + + + {7782ecac-f1e4-49cb-b06c-0fc343dd3a97} + match + + + + + Source Files + + + + + Test Scripts + + + \ No newline at end of file