diff --git a/.changelog/170.feature b/.changelog/170.feature
new file mode 100644
index 00000000..2b5b5f94
--- /dev/null
+++ b/.changelog/170.feature
@@ -0,0 +1 @@
+Improved debugging information when FIL_DEBUG is set.
\ No newline at end of file
diff --git a/filprofiler/_filpreload.c b/filprofiler/_filpreload.c
index 3ea40149..dfd964f4 100644
--- a/filprofiler/_filpreload.c
+++ b/filprofiler/_filpreload.c
@@ -384,18 +384,26 @@ SYMBOL_PREFIX(calloc)(size_t nmemb, size_t size) {
 
 __attribute__((visibility("default"))) void *
 SYMBOL_PREFIX(realloc)(void *addr, size_t size) {
+  // We do removal bookkeeping first. Otherwise, as soon as the freeing happens
+  // another thread may allocate the same address, leading to a race condition
+  // in the bookkeeping metadata.
+  //
+  // If realloc() fails due to lack of memory, this will result in memory still
+  // existing but Fil thinking it's gone. However, at that point Fil will then
+  // exit with OOM report, so... not the end of the world, and unlikely in
+  // practice.
+  if (should_track_memory() && ((size_t)addr != 0)) {
+    set_will_i_be_reentrant(1);
+    // Sometimes you'll get same address, so if we did add first and then
+    // removed, it would remove the entry erroneously.
+    pymemprofile_free_allocation((size_t)addr);
+    set_will_i_be_reentrant(0);
+  }
   uint64_t currently_reentrant = maybe_set_reentrant_linux();
   void *result = REAL_IMPL(realloc)(addr, size);
   maybe_restore_reentrant_linux(currently_reentrant);
   if (should_track_memory()) {
     set_will_i_be_reentrant(1);
-    // Sometimes you'll get same address, so if we did add first and then
-    // removed, it would remove the entry erroneously.
-    //
-    // Sometimes address is 0, in which case this is equivalent to malloc().
-    if ((size_t)addr != 0) {
-      pymemprofile_free_allocation((size_t)addr);
-    }
     add_allocation((size_t)result, size);
     set_will_i_be_reentrant(0);
   }
@@ -416,14 +424,17 @@ SYMBOL_PREFIX(posix_memalign)(void **memptr, size_t alignment, size_t size) {
 }
 
 __attribute__((visibility("default"))) void SYMBOL_PREFIX(free)(void *addr) {
-  uint64_t currently_reentrant = maybe_set_reentrant_linux();
-  REAL_IMPL(free)(addr);
-  maybe_restore_reentrant_linux(currently_reentrant);
+  // We do bookkeeping first. Otherwise, as soon as the free() happens another
+  // thread may allocate the same address, leading to a race condition in the
+  // bookkeeping metadata.
   if (should_track_memory()) {
     set_will_i_be_reentrant(1);
     pymemprofile_free_allocation((size_t)addr);
     set_will_i_be_reentrant(0);
   }
+  uint64_t currently_reentrant = maybe_set_reentrant_linux();
+  REAL_IMPL(free)(addr);
+  maybe_restore_reentrant_linux(currently_reentrant);
 }
 
 // On Linux this is exposed via --wrap, to get both mmap() and mmap64() without
@@ -469,13 +480,16 @@ __attribute__((visibility("default"))) int SYMBOL_PREFIX(munmap)(
 #endif
   }
 
-  int result = underlying_real_munmap(addr, length);
-  if (result != -1 && should_track_memory()) {
+  if (should_track_memory()) {
     set_will_i_be_reentrant(1);
     pymemprofile_free_anon_mmap((size_t)addr, length);
     set_will_i_be_reentrant(0);
   }
-  return result;
+
+  // if munmap() fails the above removal is worng, but that's highly unlikely,
+  // and we want to prevent threading race condition so need to remove metadata
+  // first.
+  return underlying_real_munmap(addr, length);
 }
 
 // Old glibc that Conda uses defines aligned_alloc() using inline that doesn't