diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index 246068986ea..26a9ee08067 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -12,6 +12,8 @@ #pragma once #include +#include +#include #ifndef _WIN32 @@ -22,6 +24,25 @@ ET_INLINE size_t get_os_page_size() { return sysconf(_SC_PAGESIZE); } +/** + * Platform-specific file stat function. + */ +ET_INLINE int get_file_stat(int fd, size_t* out_size) { + struct stat st; + int err = ::fstat(fd, &st); + if (err >= 0) { + *out_size = static_cast(st.st_size); + } + return err; +} + +/** + * Platform-specific mmap offset type conversion. + */ +ET_INLINE off_t get_mmap_offset(size_t offset) { + return static_cast(offset); +} + #else #define NOMINMAX @@ -40,4 +61,23 @@ ET_INLINE long get_os_page_size() { return pagesize; } +/** + * Platform-specific file stat function. + */ +ET_INLINE int get_file_stat(int fd, size_t* out_size) { + struct _stat64 st; + int err = ::_fstat64(fd, &st); + if (err >= 0) { + *out_size = static_cast(st.st_size); + } + return err; +} + +/** + * Platform-specific mmap offset type conversion. + */ +ET_INLINE uint64_t get_mmap_offset(size_t offset) { + return static_cast(offset); +} + #endif diff --git a/extension/data_loader/mman_windows.cpp b/extension/data_loader/mman_windows.cpp index 89f9f22f467..9bc655a7827 100644 --- a/extension/data_loader/mman_windows.cpp +++ b/extension/data_loader/mman_windows.cpp @@ -21,7 +21,11 @@ #include #include +#include +#include +#define NOMINMAX #include +#undef NOMINMAX #ifndef STATUS_SECTION_TOO_BIG #define STATUS_SECTION_TOO_BIG 0xC0000040L @@ -129,49 +133,44 @@ static DWORD __map_mmap_prot_file(const int prot) { } // namespace -void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off) { +void* mmap( + void* addr, + size_t len, + int prot, + int flags, + int fildes, + uint64_t off) { HANDLE fm, h; - void* map = MAP_FAILED; -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4293) -#endif - - const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD)) - ? (DWORD)off - : (DWORD)(off & 0xFFFFFFFFL); - const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD)) - ? (DWORD)0 - : (DWORD)((off >> 32) & 0xFFFFFFFFL); - const DWORD protect = __map_mmap_prot_page(prot); - const DWORD desiredAccess = __map_mmap_prot_file(prot); - - const off_t maxSize = off + (off_t)len; - - const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD)) - ? (DWORD)maxSize - : (DWORD)(maxSize & 0xFFFFFFFFL); - const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD)) - ? (DWORD)0 - : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL); - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - errno = 0; if (len == 0 /* Unsupported flag combinations */ || (flags & MAP_FIXED) != 0 - /* Usupported protection combinations */ + /* Unsupported protection combinations */ || prot == PROT_EXEC) { errno = EINVAL; return MAP_FAILED; } + if (off > std::numeric_limits::max() - len) { + errno = EINVAL; + return MAP_FAILED; + } + + const std::uint64_t maxSize = off + static_cast(len); + + const DWORD dwFileOffsetLow = static_cast(off & 0xFFFFFFFFULL); + const DWORD dwFileOffsetHigh = + static_cast((off >> 32) & 0xFFFFFFFFULL); + const DWORD protect = __map_mmap_prot_page(prot); + const DWORD desiredAccess = __map_mmap_prot_file(prot); + + const DWORD dwMaxSizeLow = static_cast(maxSize & 0xFFFFFFFFULL); + const DWORD dwMaxSizeHigh = + static_cast((maxSize >> 32) & 0xFFFFFFFFULL); + h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE; diff --git a/extension/data_loader/mman_windows.h b/extension/data_loader/mman_windows.h index 563db5d8b21..36e65bcda3c 100644 --- a/extension/data_loader/mman_windows.h +++ b/extension/data_loader/mman_windows.h @@ -31,6 +31,7 @@ #endif #include +#include #ifdef __cplusplus extern "C" { @@ -56,7 +57,13 @@ extern "C" { #define MS_SYNC 2 #define MS_INVALIDATE 4 -void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off); +void* mmap( + void* addr, + size_t len, + int prot, + int flags, + int fildes, + uint64_t off); int munmap(void* addr, size_t len); int mprotect(void* addr, size_t len, int prot); int msync(void* addr, size_t len, int flags); diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 10bd2f35f5e..2271d5a3690 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -94,8 +95,8 @@ Result MmapDataLoader::from( } // Cache the file size. - struct stat st; - int err = ::fstat(fd, &st); + size_t file_size; + int err = get_file_stat(fd, &file_size); if (err < 0) { ET_LOG( Error, @@ -106,7 +107,6 @@ Result MmapDataLoader::from( ::close(fd); return Error::AccessFailed; } - size_t file_size = st.st_size; // Copy the filename so we can print better debug messages if reads fail. const char* file_name_copy = ::strdup(file_name); @@ -167,12 +167,6 @@ Error MmapDataLoader::validate_input(size_t offset, size_t size) const { offset, size, file_size_); - ET_CHECK_OR_RETURN_ERROR( - // Recommended by a lint warning. - offset <= std::numeric_limits::max(), - InvalidArgument, - "Offset %zu too large for off_t", - offset); return Error::Ok; } @@ -207,13 +201,10 @@ Result MmapDataLoader::load( // Map the pages read-only. Use shared mappings so that other processes // can also map the same pages and share the same memory. - void* pages = ::mmap( - nullptr, - map_size, - PROT_READ, - MAP_SHARED, - fd_, - static_cast(range.start)); + const auto map_offset = get_mmap_offset(range.start); + + void* pages = + ::mmap(nullptr, map_size, PROT_READ, MAP_SHARED, fd_, map_offset); ET_CHECK_OR_RETURN_ERROR( pages != MAP_FAILED, AccessFailed, @@ -315,13 +306,10 @@ Error MmapDataLoader::load_into( // Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since // the data is read-only, but use PRIVATE just to further avoid accidentally // modifying the file. - void* pages = ::mmap( - nullptr, - map_size, - PROT_READ, - MAP_PRIVATE, - fd_, - static_cast(range.start)); + const auto map_offset = get_mmap_offset(range.start); + + void* pages = + ::mmap(nullptr, map_size, PROT_READ, MAP_PRIVATE, fd_, map_offset); ET_CHECK_OR_RETURN_ERROR( pages != MAP_FAILED, AccessFailed, diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index 76b972c46d0..df071fd7474 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -9,6 +9,7 @@ #include #include +#include #include @@ -428,4 +429,61 @@ TEST_F(MmapDataLoaderTest, LoadIntoCopiesOffsetCorrectly) { // Verify memory copied correctly. EXPECT_EQ(0, std::memcmp(dst, contents + offset, size)); -} \ No newline at end of file +} + +// Tests that the loader can handle files requiring 64-bit file systems. +// This test verifies that offsets and sizes beyond 32-bit limits are handled +// correctly by creating a sparse file with data at a large offset. +TEST_F(MmapDataLoaderTest, LargeFileOffsetSupport) { +// We run some 32 bit tests on Linux so we need to skip this +// test. +#ifndef _WIN32 + if (sizeof(off_t) <= 8) { + return; + } +#endif + // Create a sparse file with a marker at an offset beyond 2GB (32-bit limit). + // We use 3GB to ensure we're testing 64-bit offset handling. + const size_t large_offset = 3ULL * 1024 * 1024 * 1024; // 3GB + const std::string test_marker = "TEST_MARKER_AT_LARGE_OFFSET"; + + // Use TempFile sparse file API to create a 3GB+ file + TempFile tf(large_offset, test_marker, large_offset + test_marker.size()); + + // Now try to load the data using MmapDataLoader. + Result mdl = MmapDataLoader::from(tf.path().c_str()); + ASSERT_EQ(mdl.error(), Error::Ok) + << "Failed to create MmapDataLoader for large sparse file"; + + // Verify the file size is reported correctly (should be > 3GB). + Result file_size = mdl->size(); + ASSERT_EQ(file_size.error(), Error::Ok); + EXPECT_GT(*file_size, large_offset) + << "File size should be larger than the large offset"; + EXPECT_EQ(*file_size, large_offset + test_marker.size()) + << "File size should match offset + marker size"; + + // Try to load the marker data from the large offset. + Result fb = mdl->load( + large_offset, + test_marker.size(), + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program)); + ASSERT_EQ(fb.error(), Error::Ok) << "Failed to load data from large offset"; + + EXPECT_EQ(fb->size(), test_marker.size()); + EXPECT_EQ(0, std::memcmp(fb->data(), test_marker.data(), test_marker.size())) + << "Data at large offset does not match expected marker"; + + // Test load_into as well. + std::vector buffer(test_marker.size()); + Error err = mdl->load_into( + large_offset, + test_marker.size(), + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program), + buffer.data()); + ASSERT_EQ(err, Error::Ok) << "load_into failed for large offset"; + + EXPECT_EQ( + 0, std::memcmp(buffer.data(), test_marker.data(), test_marker.size())) + << "load_into data at large offset does not match expected marker"; +} diff --git a/extension/testing_util/temp_file.h b/extension/testing_util/temp_file.h index 4edaf2135d8..0310a5dbeff 100644 --- a/extension/testing_util/temp_file.h +++ b/extension/testing_util/temp_file.h @@ -12,8 +12,8 @@ #include #include #include +#include -#include // open() #include namespace executorch { @@ -42,6 +42,27 @@ class TempFile { CreateFile(data, size, &path_); } + /** + * Creates a sparse temporary file with a string at a specific offset. + * The file will have the specified total size, but only the data at the + * given offset will be written, creating a sparse file that doesn't + * allocate all the disk space. + * + * Example: + * // Create a 3GB file with "DATA_AT_3GB" at 3GB offset + * size_t offset = 3ULL * 1024 * 1024 * 1024; + * std::string data = "DATA_AT_3GB"; + * TempFile tf(offset, data, offset + data.size()); + * + * @param offset Byte offset where the string should be written + * @param data String to write at the specified offset + * @param file_size Total size of the sparse file (must be >= offset + + * data.size()) + */ + TempFile(size_t offset, const std::string& data, size_t file_size) { + CreateSparseFile(offset, data, file_size, &path_); + } + ~TempFile() { if (!path_.empty()) { std::remove(path_.c_str()); @@ -81,6 +102,58 @@ class TempFile { *out_path = path; } + void CreateSparseFile( + size_t offset, + const std::string& data, + size_t file_size, + std::string* out_path) { + ASSERT_GE(file_size, offset + data.size()) + << "file_size must be >= offset + data.size()"; + + // Find a unique temporary file name. + std::string path; + { + std::array buf; + const char* ret = std::tmpnam(buf.data()); + ASSERT_NE(ret, nullptr) << "Could not generate temp file"; + buf[L_tmpnam - 1] = '\0'; + path = std::string(buf.data()) + "-executorch-testing"; + } + + // Open file in binary mode for writing. + std::ofstream file(path, std::ios::out | std::ios::binary); + ASSERT_TRUE(file.is_open()) + << "open(" << path << ") failed: " << strerror(errno); + + // Seek to the offset. + file.seekp(offset, std::ios::beg); + ASSERT_TRUE(file.good()) << "Failed to seek to offset " << offset; + + // Write the data. + file.write(data.data(), data.size()); + ASSERT_TRUE(file.good()) + << "Failed to write " << data.size() << " bytes at offset " << offset; + + // Ensure file is the specified size by seeking to the end and writing a + // byte, but only if the file needs to be extended beyond the data we just + // wrote. + if (file_size > offset + data.size()) { + file.seekp(file_size - 1, std::ios::beg); + ASSERT_TRUE(file.good()) + << "Failed to seek to file_size - 1: " << file_size - 1; + + // Write a single byte to ensure file has the correct size. + file.write("\0", 1); + ASSERT_TRUE(file.good()) + << "Failed to write final byte at position " << file_size - 1; + } + + file.close(); + ASSERT_TRUE(file.good() || file.eof()) << "Error closing file: " << path; + + *out_path = path; + } + // Not safely copyable/movable. TempFile(const TempFile&) = delete; TempFile& operator=(const TempFile&) = delete;