diff --git a/src/Spout/Common/Helper/GlobalFunctionsHelper.php b/src/Spout/Common/Helper/GlobalFunctionsHelper.php index 0b5f6f18..d567dc7f 100644 --- a/src/Spout/Common/Helper/GlobalFunctionsHelper.php +++ b/src/Spout/Common/Helper/GlobalFunctionsHelper.php @@ -23,6 +23,19 @@ public function fopen($fileName, $mode) return fopen($fileName, $mode); } + /** + * Wrapper around global function fread() + * @see fread() + * + * @param resource $handle + * @param int|null $length + * @return string + */ + public function fread($handle, $length = null) + { + return fread($handle, $length); + } + /** * Wrapper around global function fgets() * @see fgets() @@ -67,11 +80,24 @@ public function fflush($handle) * * @param resource $handle * @param int $offset + * @param int $whence * @return int */ - public function fseek($handle, $offset) + public function fseek($handle, $offset, $whence = SEEK_SET) + { + return fseek($handle, $offset, $whence); + } + + /** + * Wrapper around global function ftell() + * @see fseek() + * + * @param resource $handle + * @return bool|int + */ + public function ftell($handle) { - return fseek($handle, $offset); + return ftell($handle); } /** diff --git a/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php b/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php index c8ded706..fed7ebe8 100644 --- a/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php +++ b/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php @@ -17,6 +17,9 @@ class FileBasedStrategy implements CachingStrategyInterface /** Value to use to escape the line feed character ("\n") */ const ESCAPED_LINE_FEED_CHARACTER = '_x000A_'; + /** Index entry size uint32 for offset and uint16 for length */ + const INDEX_ENTRY_SIZE = 6; + /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; @@ -33,19 +36,7 @@ class FileBasedStrategy implements CachingStrategyInterface protected $maxNumStringsPerTempFile; /** @var resource Pointer to the last temp file a shared string was written to */ - protected $tempFilePointer; - - /** - * @var string Path of the temporary file whose contents is currently stored in memory - * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE - */ - protected $inMemoryTempFilePath; - - /** - * @var array Contents of the temporary file that was last read - * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE - */ - protected $inMemoryTempFileContents; + protected $tempFilePointers; /** * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored @@ -60,7 +51,29 @@ public function __construct($tempFolder, $maxNumStringsPerTempFile, $helperFacto $this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile; $this->globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper(); - $this->tempFilePointer = null; + $this->tempFilePointers = []; + } + + /** + * Open file with cache + * + * @param string $tempFilePath filename with shared strings + */ + private function openCache($tempFilePath) + { + if (!array_key_exists($tempFilePath, $this->tempFilePointers)) { + // Open index file and seek to end + $index = $this->globalFunctionsHelper->fopen($tempFilePath . '.index', 'c+'); + $this->globalFunctionsHelper->fseek($index, 0, SEEK_END); + + // Open data file and seek to end + $data = $this->globalFunctionsHelper->fopen($tempFilePath, 'c+'); + $this->globalFunctionsHelper->fseek($data, 0, SEEK_END); + + $this->tempFilePointers[$tempFilePath] = [$index, $data]; + } + + return $this->tempFilePointers[$tempFilePath]; } /** @@ -74,18 +87,14 @@ public function addStringForIndex($sharedString, $sharedStringIndex) { $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); - if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { - if ($this->tempFilePointer) { - $this->globalFunctionsHelper->fclose($this->tempFilePointer); - } - $this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w'); - } + list($index, $data) = $this->openCache($tempFilePath); // The shared string retrieval logic expects each cell data to be on one line only // Encoding the line feed character allows to preserve this assumption $lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString); - $this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL); + $this->globalFunctionsHelper->fwrite($index, pack('Nn', $this->globalFunctionsHelper->ftell($data), strlen($lineFeedEncodedSharedString) + strlen(PHP_EOL))); + $this->globalFunctionsHelper->fwrite($data, $lineFeedEncodedSharedString . PHP_EOL); } /** @@ -110,9 +119,13 @@ protected function getSharedStringTempFilePath($sharedStringIndex) public function closeCache() { // close pointer to the last temp file that was written - if ($this->tempFilePointer) { - $this->globalFunctionsHelper->fclose($this->tempFilePointer); + if (!empty($this->tempFilePointers)) { + foreach ($this->tempFilePointers as $pointer) { + $this->globalFunctionsHelper->fclose($pointer[0]); + $this->globalFunctionsHelper->fclose($pointer[1]); + } } + $this->tempFilePointers = []; } /** @@ -131,19 +144,17 @@ public function getStringAtIndex($sharedStringIndex) throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); } - if ($this->inMemoryTempFilePath !== $tempFilePath) { - // free memory - unset($this->inMemoryTempFileContents); + list($index, $data) = $this->openCache($tempFilePath); - $this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath)); - $this->inMemoryTempFilePath = $tempFilePath; - } + // Read index entry + $this->globalFunctionsHelper->fseek($index, $indexInFile * self::INDEX_ENTRY_SIZE); + $indexEntryBytes = $this->globalFunctionsHelper->fread($index, self::INDEX_ENTRY_SIZE); + $indexEntry = unpack('Noffset/nlen', $indexEntryBytes); $sharedString = null; - - // Using isset here because it is way faster than array_key_exists... - if (isset($this->inMemoryTempFileContents[$indexInFile])) { - $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; + if ($indexEntry['offset'] + $indexEntry['len'] <= filesize($tempFilePath)) { + $this->globalFunctionsHelper->fseek($data, $indexEntry['offset']); + $escapedSharedString = $this->globalFunctionsHelper->fread($data, $indexEntry['len']); $sharedString = $this->unescapeLineFeed($escapedSharedString); }