From abddc0b48ff21c6277db6e5ad267046735fbdb3e Mon Sep 17 00:00:00 2001 From: Alexander Golovenkin Date: Wed, 16 Jan 2019 05:12:58 -0800 Subject: [PATCH 1/2] XLSX performance --- .../Common/Helper/GlobalFunctionsHelper.php | 30 ++++++- .../FileBasedStrategy.php | 82 +++++++++++-------- 2 files changed, 74 insertions(+), 38 deletions(-) diff --git a/src/Spout/Common/Helper/GlobalFunctionsHelper.php b/src/Spout/Common/Helper/GlobalFunctionsHelper.php index 0b5f6f18..708bd2bc 100644 --- a/src/Spout/Common/Helper/GlobalFunctionsHelper.php +++ b/src/Spout/Common/Helper/GlobalFunctionsHelper.php @@ -23,6 +23,19 @@ public function fopen($fileName, $mode) return fopen($fileName, $mode); } + /** + * Wrapper around global function fread() + * @see fread() + * + * @param resource $handle + * @param int|null $length + * @return string + */ + public function fread($handle, $length = null) + { + return fread($handle, $length); + } + /** * Wrapper around global function fgets() * @see fgets() @@ -67,13 +80,26 @@ public function fflush($handle) * * @param resource $handle * @param int $offset + * @param int $whence * @return int */ - public function fseek($handle, $offset) + public function fseek($handle, $offset, $whence = SEEK_SET) { - return fseek($handle, $offset); + return fseek($handle, $offset, $whence); } + /** + * Wrapper around global function ftell() + * @see fseek() + * + * @param resource $handle + * @return bool|int + */ + public function ftell($handle) + { + return ftell($handle); + } + /** * Wrapper around global function fgetcsv() * @see fgetcsv() diff --git a/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php b/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php index c8ded706..6c2785b7 100644 --- a/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php +++ b/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php @@ -17,6 +17,9 @@ class FileBasedStrategy implements CachingStrategyInterface /** Value to use to escape the line feed character ("\n") */ const ESCAPED_LINE_FEED_CHARACTER = '_x000A_'; + /** Index entry size uint32 for offset and uint16 for length */ + const INDEX_ENTRY_SIZE = 6; + /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; @@ -33,19 +36,7 @@ class FileBasedStrategy implements CachingStrategyInterface protected $maxNumStringsPerTempFile; /** @var resource Pointer to the last temp file a shared string was written to */ - protected $tempFilePointer; - - /** - * @var string Path of the temporary file whose contents is currently stored in memory - * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE - */ - protected $inMemoryTempFilePath; - - /** - * @var array Contents of the temporary file that was last read - * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE - */ - protected $inMemoryTempFileContents; + protected $tempFilePointers; /** * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored @@ -60,9 +51,30 @@ public function __construct($tempFolder, $maxNumStringsPerTempFile, $helperFacto $this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile; $this->globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper(); - $this->tempFilePointer = null; + $this->tempFilePointers = []; } + /** + * Open file with cache + * + * @param string $tempFilePath filename with shared strings + */ + private function openCache($tempFilePath) { + if (!array_key_exists($tempFilePath, $this->tempFilePointers)) { + // Open index file and seek to end + $index = $this->globalFunctionsHelper->fopen($tempFilePath . '.index', 'c+'); + $this->globalFunctionsHelper->fseek($index, 0, SEEK_END); + + // Open data file and seek to end + $data = $this->globalFunctionsHelper->fopen($tempFilePath, 'c+'); + $this->globalFunctionsHelper->fseek($data, 0, SEEK_END); + + $this->tempFilePointers[$tempFilePath] = [$index, $data]; + } + + return $this->tempFilePointers[$tempFilePath]; + } + /** * Adds the given string to the cache. * @@ -74,18 +86,14 @@ public function addStringForIndex($sharedString, $sharedStringIndex) { $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); - if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { - if ($this->tempFilePointer) { - $this->globalFunctionsHelper->fclose($this->tempFilePointer); - } - $this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w'); - } + list($index, $data) = $this->openCache($tempFilePath); // The shared string retrieval logic expects each cell data to be on one line only // Encoding the line feed character allows to preserve this assumption $lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString); - $this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL); + $this->globalFunctionsHelper->fwrite($index, pack('Nn', $this->globalFunctionsHelper->ftell($data), strlen($lineFeedEncodedSharedString) + strlen(PHP_EOL))); + $this->globalFunctionsHelper->fwrite($data, $lineFeedEncodedSharedString . PHP_EOL); } /** @@ -110,9 +118,13 @@ protected function getSharedStringTempFilePath($sharedStringIndex) public function closeCache() { // close pointer to the last temp file that was written - if ($this->tempFilePointer) { - $this->globalFunctionsHelper->fclose($this->tempFilePointer); + if (!empty($this->tempFilePointers)) { + foreach ($this->tempFilePointers as $pointer) { + $this->globalFunctionsHelper->fclose($pointer[0]); + $this->globalFunctionsHelper->fclose($pointer[1]); + } } + $this->tempFilePointers = []; } /** @@ -131,21 +143,19 @@ public function getStringAtIndex($sharedStringIndex) throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); } - if ($this->inMemoryTempFilePath !== $tempFilePath) { - // free memory - unset($this->inMemoryTempFileContents); - - $this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath)); - $this->inMemoryTempFilePath = $tempFilePath; - } + list($index, $data) = $this->openCache($tempFilePath); - $sharedString = null; + // Read index entry + $this->globalFunctionsHelper->fseek($index, $indexInFile * self::INDEX_ENTRY_SIZE); + $indexEntryBytes = $this->globalFunctionsHelper->fread($index, self::INDEX_ENTRY_SIZE); + $indexEntry = unpack('Noffset/nlen', $indexEntryBytes); - // Using isset here because it is way faster than array_key_exists... - if (isset($this->inMemoryTempFileContents[$indexInFile])) { - $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; - $sharedString = $this->unescapeLineFeed($escapedSharedString); - } + $sharedString = null; + if ($indexEntry['offset'] + $indexEntry['len'] <= filesize($tempFilePath)) { + $this->globalFunctionsHelper->fseek($data, $indexEntry['offset']); + $escapedSharedString = $this->globalFunctionsHelper->fread($data, $indexEntry['len']); + $sharedString = $this->unescapeLineFeed($escapedSharedString); + } if ($sharedString === null) { throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); From 956056511a13683bbbb2531e557d42c0e9bee5ce Mon Sep 17 00:00:00 2001 From: Alexander Golovenkin Date: Wed, 16 Jan 2019 05:49:54 -0800 Subject: [PATCH 2/2] Fix coding style --- .../Common/Helper/GlobalFunctionsHelper.php | 46 ++++++------ .../FileBasedStrategy.php | 71 ++++++++++--------- 2 files changed, 59 insertions(+), 58 deletions(-) diff --git a/src/Spout/Common/Helper/GlobalFunctionsHelper.php b/src/Spout/Common/Helper/GlobalFunctionsHelper.php index 708bd2bc..d567dc7f 100644 --- a/src/Spout/Common/Helper/GlobalFunctionsHelper.php +++ b/src/Spout/Common/Helper/GlobalFunctionsHelper.php @@ -23,18 +23,18 @@ public function fopen($fileName, $mode) return fopen($fileName, $mode); } - /** - * Wrapper around global function fread() - * @see fread() - * - * @param resource $handle - * @param int|null $length - * @return string - */ - public function fread($handle, $length = null) - { - return fread($handle, $length); - } + /** + * Wrapper around global function fread() + * @see fread() + * + * @param resource $handle + * @param int|null $length + * @return string + */ + public function fread($handle, $length = null) + { + return fread($handle, $length); + } /** * Wrapper around global function fgets() @@ -88,17 +88,17 @@ public function fseek($handle, $offset, $whence = SEEK_SET) return fseek($handle, $offset, $whence); } - /** - * Wrapper around global function ftell() - * @see fseek() - * - * @param resource $handle - * @return bool|int - */ - public function ftell($handle) - { - return ftell($handle); - } + /** + * Wrapper around global function ftell() + * @see fseek() + * + * @param resource $handle + * @return bool|int + */ + public function ftell($handle) + { + return ftell($handle); + } /** * Wrapper around global function fgetcsv() diff --git a/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php b/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php index 6c2785b7..fed7ebe8 100644 --- a/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php +++ b/src/Spout/Reader/XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php @@ -17,8 +17,8 @@ class FileBasedStrategy implements CachingStrategyInterface /** Value to use to escape the line feed character ("\n") */ const ESCAPED_LINE_FEED_CHARACTER = '_x000A_'; - /** Index entry size uint32 for offset and uint16 for length */ - const INDEX_ENTRY_SIZE = 6; + /** Index entry size uint32 for offset and uint16 for length */ + const INDEX_ENTRY_SIZE = 6; /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; @@ -54,26 +54,27 @@ public function __construct($tempFolder, $maxNumStringsPerTempFile, $helperFacto $this->tempFilePointers = []; } - /** - * Open file with cache - * - * @param string $tempFilePath filename with shared strings - */ - private function openCache($tempFilePath) { - if (!array_key_exists($tempFilePath, $this->tempFilePointers)) { - // Open index file and seek to end - $index = $this->globalFunctionsHelper->fopen($tempFilePath . '.index', 'c+'); - $this->globalFunctionsHelper->fseek($index, 0, SEEK_END); + /** + * Open file with cache + * + * @param string $tempFilePath filename with shared strings + */ + private function openCache($tempFilePath) + { + if (!array_key_exists($tempFilePath, $this->tempFilePointers)) { + // Open index file and seek to end + $index = $this->globalFunctionsHelper->fopen($tempFilePath . '.index', 'c+'); + $this->globalFunctionsHelper->fseek($index, 0, SEEK_END); - // Open data file and seek to end - $data = $this->globalFunctionsHelper->fopen($tempFilePath, 'c+'); - $this->globalFunctionsHelper->fseek($data, 0, SEEK_END); + // Open data file and seek to end + $data = $this->globalFunctionsHelper->fopen($tempFilePath, 'c+'); + $this->globalFunctionsHelper->fseek($data, 0, SEEK_END); - $this->tempFilePointers[$tempFilePath] = [$index, $data]; - } + $this->tempFilePointers[$tempFilePath] = [$index, $data]; + } - return $this->tempFilePointers[$tempFilePath]; - } + return $this->tempFilePointers[$tempFilePath]; + } /** * Adds the given string to the cache. @@ -92,7 +93,7 @@ public function addStringForIndex($sharedString, $sharedStringIndex) // Encoding the line feed character allows to preserve this assumption $lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString); - $this->globalFunctionsHelper->fwrite($index, pack('Nn', $this->globalFunctionsHelper->ftell($data), strlen($lineFeedEncodedSharedString) + strlen(PHP_EOL))); + $this->globalFunctionsHelper->fwrite($index, pack('Nn', $this->globalFunctionsHelper->ftell($data), strlen($lineFeedEncodedSharedString) + strlen(PHP_EOL))); $this->globalFunctionsHelper->fwrite($data, $lineFeedEncodedSharedString . PHP_EOL); } @@ -119,12 +120,12 @@ public function closeCache() { // close pointer to the last temp file that was written if (!empty($this->tempFilePointers)) { - foreach ($this->tempFilePointers as $pointer) { - $this->globalFunctionsHelper->fclose($pointer[0]); - $this->globalFunctionsHelper->fclose($pointer[1]); - } + foreach ($this->tempFilePointers as $pointer) { + $this->globalFunctionsHelper->fclose($pointer[0]); + $this->globalFunctionsHelper->fclose($pointer[1]); + } } - $this->tempFilePointers = []; + $this->tempFilePointers = []; } /** @@ -146,16 +147,16 @@ public function getStringAtIndex($sharedStringIndex) list($index, $data) = $this->openCache($tempFilePath); // Read index entry - $this->globalFunctionsHelper->fseek($index, $indexInFile * self::INDEX_ENTRY_SIZE); - $indexEntryBytes = $this->globalFunctionsHelper->fread($index, self::INDEX_ENTRY_SIZE); - $indexEntry = unpack('Noffset/nlen', $indexEntryBytes); - - $sharedString = null; - if ($indexEntry['offset'] + $indexEntry['len'] <= filesize($tempFilePath)) { - $this->globalFunctionsHelper->fseek($data, $indexEntry['offset']); - $escapedSharedString = $this->globalFunctionsHelper->fread($data, $indexEntry['len']); - $sharedString = $this->unescapeLineFeed($escapedSharedString); - } + $this->globalFunctionsHelper->fseek($index, $indexInFile * self::INDEX_ENTRY_SIZE); + $indexEntryBytes = $this->globalFunctionsHelper->fread($index, self::INDEX_ENTRY_SIZE); + $indexEntry = unpack('Noffset/nlen', $indexEntryBytes); + + $sharedString = null; + if ($indexEntry['offset'] + $indexEntry['len'] <= filesize($tempFilePath)) { + $this->globalFunctionsHelper->fseek($data, $indexEntry['offset']); + $escapedSharedString = $this->globalFunctionsHelper->fread($data, $indexEntry['len']); + $sharedString = $this->unescapeLineFeed($escapedSharedString); + } if ($sharedString === null) { throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex");