diff --git a/src/compiler-defines.h b/src/compiler-defines.h
index 0fb7eb00..b2e331db 100644
--- a/src/compiler-defines.h
+++ b/src/compiler-defines.h
@@ -40,4 +40,10 @@
 
 #define PACKED __attribute__((packed))
 
+#if defined(__GNUC__)
+#define FORCE_OPTIMIZE_O3 __attribute__((optimize("-O3")))
+#else
+#define FORCE_OPTIMIZE_O3
+#endif
+
 #endif
diff --git a/src/is-pristine.h b/src/is-pristine.h
new file mode 100644
index 00000000..458d297f
--- /dev/null
+++ b/src/is-pristine.h
@@ -0,0 +1,43 @@
+// Copyright Cartesi and individual authors (see AUTHORS)
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option) any
+// later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License along
+// with this program (see COPYING). If not, see <https://www.gnu.org/licenses/>.
+//
+
+#ifndef IS_PRISTINE_H
+#define IS_PRISTINE_H
+
+#include "compiler-defines.h"
+#include <stddef.h>
+#include <stdint.h>
+
+namespace cartesi {
+
+/// \brief This is an optimized function for checking if memory page is pristine.
+/// \param data Memory pointer
+/// \param length Memory length
+/// \details It's instead to be used in situations where length is equal or less than a page size.
+// NOLINTNEXTLINE(clang-diagnostic-unknown-attributes)
+static inline bool FORCE_OPTIMIZE_O3 is_pristine(const unsigned char *data, size_t length) {
+    // This tight for loop has no branches, and is optimized to SIMD instructions in x86_64,
+    // making it very fast to check if a given page is pristine.
+    unsigned char bits = 0;
+    for (size_t i = 0; i < length; ++i) {
+        bits |= data[i];
+    }
+    return bits == 0;
+}
+
+} // namespace cartesi
+
+#endif
\ No newline at end of file
diff --git a/src/machine.cpp b/src/machine.cpp
index 28836c88..cb34b4c5 100644
--- a/src/machine.cpp
+++ b/src/machine.cpp
@@ -43,6 +43,7 @@
 #include "htif.h"
 #include "i-device-state-access.h"
 #include "interpret.h"
+#include "is-pristine.h"
 #include "machine-config.h"
 #include "machine-memory-range-descr.h"
 #include "machine-runtime-config.h"
@@ -2031,10 +2032,7 @@ bool machine::update_merkle_tree() const {
                     return false;
                 }
                 if (page_data != nullptr) {
-                    const bool is_pristine = std::all_of(page_data, page_data + PMA_PAGE_SIZE,
-                        [](unsigned char pp) -> bool { return pp == '\0'; });
-
-                    if (is_pristine) {
+                    if (is_pristine(page_data, PMA_PAGE_SIZE)) {
                         // The update_page_node_hash function in the machine_merkle_tree is not thread
                         // safe, so we protect it with a mutex
                         const parallel_for_mutex_guard lock(mutex);
diff --git a/src/pma.cpp b/src/pma.cpp
index cde8d846..45d29a5e 100644
--- a/src/pma.cpp
+++ b/src/pma.cpp
@@ -26,6 +26,7 @@
 #include <system_error>
 #include <tuple>
 
+#include "is-pristine.h"
 #include "os.h"
 #include "pma-constants.h"
 #include "pma-driver.h"
@@ -162,8 +163,24 @@ void pma_entry::write_memory(uint64_t paddr, const unsigned char *data, uint64_t
     if (data == nullptr) {
         throw std::invalid_argument{"invalid data buffer"};
     }
-    memcpy(get_memory().get_host_memory() + (paddr - get_start()), data, size);
-    mark_dirty_pages(paddr, size);
+    // The case of writing a large range chunk is special and optimized for uarch reset
+    if (size > PMA_PAGE_SIZE) {
+        // Copy in chunks of page size, to avoid marking dirty pages unnecessarily
+        for (uint64_t offset = 0; offset < size; offset += PMA_PAGE_SIZE) {
+            const uint64_t paddr_offset = paddr + offset;
+            const uint64_t chunk_len = std::min<uint64_t>(PMA_PAGE_SIZE, size - offset);
+            const unsigned char *src = data + offset;
+            unsigned char *dest = get_memory().get_host_memory() + (paddr_offset - get_start());
+            if (memcmp(dest, src, chunk_len) != 0) {
+                // Page is different, we have to copy memory
+                memcpy(dest, src, chunk_len);
+                mark_dirty_pages(paddr + offset, chunk_len);
+            }
+        }
+    } else {
+        memcpy(get_memory().get_host_memory() + (paddr - get_start()), data, size);
+        mark_dirty_pages(paddr, size);
+    }
 }
 
 void pma_entry::fill_memory(uint64_t paddr, unsigned char value, uint64_t size) {
@@ -173,8 +190,23 @@ void pma_entry::fill_memory(uint64_t paddr, unsigned char value, uint64_t size)
     if (!contains(paddr, size)) {
         throw std::invalid_argument{"range not contained in pma"};
     }
-    memset(get_memory().get_host_memory() + (paddr - get_start()), value, size);
-    mark_dirty_pages(paddr, size);
+    // The case of filling a large range with zeros is special and optimized for uarch reset
+    if (value == 0 && size > PMA_PAGE_SIZE) {
+        // Fill in chunks of page size, to avoid marking dirty pages unnecessarily
+        for (uint64_t offset = 0; offset < size; offset += PMA_PAGE_SIZE) {
+            const uint64_t paddr_offset = paddr + offset;
+            const uint64_t chunk_len = std::min<uint64_t>(PMA_PAGE_SIZE, size - offset);
+            unsigned char *dest = get_memory().get_host_memory() + (paddr_offset - get_start());
+            if (!is_pristine(dest, chunk_len)) {
+                // Page is different, we have to fill memory
+                memset(dest, 0, chunk_len);
+                mark_dirty_pages(paddr + offset, chunk_len);
+            }
+        }
+    } else {
+        memset(get_memory().get_host_memory() + (paddr - get_start()), value, size);
+        mark_dirty_pages(paddr, size);
+    }
 }
 
 bool pma_peek_error(const pma_entry & /*pma*/, const machine & /*m*/, uint64_t /*page_address*/,
diff --git a/src/uarch-record-state-access.h b/src/uarch-record-state-access.h
index 4b664c5f..18122fd9 100644
--- a/src/uarch-record-state-access.h
+++ b/src/uarch-record-state-access.h
@@ -449,8 +449,9 @@ class uarch_record_state_access : public i_uarch_state_access<uarch_record_state
         for (int i = 1; i < UARCH_X_REG_COUNT; i++) {
             m_us.x[i] = UARCH_X_INIT;
         }
-        m_us.ram.fill_memory(m_us.ram.get_start(), 0, m_us.ram.get_length());
         m_us.ram.write_memory(m_us.ram.get_start(), uarch_pristine_ram, uarch_pristine_ram_len);
+        m_us.ram.fill_memory(m_us.ram.get_start() + uarch_pristine_ram_len, 0,
+            m_us.ram.get_length() - uarch_pristine_ram_len);
         if (m_log->get_log_type().has_large_data()) {
             // log written data, if debug info is enabled
             a.get_written().emplace(get_uarch_state_image());
diff --git a/src/uarch-state-access.h b/src/uarch-state-access.h
index 02d8e999..c0578f2e 100644
--- a/src/uarch-state-access.h
+++ b/src/uarch-state-access.h
@@ -195,8 +195,9 @@ class uarch_state_access : public i_uarch_state_access<uarch_state_access> {
         if (uarch_pristine_ram_len > m_us.ram.get_length()) {
             throw std::runtime_error("embedded uarch ram image does not fit in uarch ram pma");
         }
-        m_us.ram.fill_memory(m_us.ram.get_start(), 0, m_us.ram.get_length());
         m_us.ram.write_memory(m_us.ram.get_start(), uarch_pristine_ram, uarch_pristine_ram_len);
+        m_us.ram.fill_memory(m_us.ram.get_start() + uarch_pristine_ram_len, 0,
+            m_us.ram.get_length() - uarch_pristine_ram_len);
     }
 };
 
diff --git a/uarch/Makefile b/uarch/Makefile
index cd80e391..41a8fe61 100644
--- a/uarch/Makefile
+++ b/uarch/Makefile
@@ -43,6 +43,7 @@ WARNFLAGS := -Wall -Wextra -Wpedantic -Wno-array-bounds -Werror
 
 CFLAGS := -march=rv64i -mabi=lp64 -Wl,--gc-sections $(OPTFLAGS) $(UBFLAGS) $(WARNFLAGS) \
 	-DMICROARCHITECTURE=1 \
+	-DNO_COMPUTED_GOTO \
 	-DAVOID_NATIVE_UINT128_T=1 \
 	-ffreestanding \
 	-nostartfiles \