From 4274d7d004f2224ee0ce60148fd7adfede568b69 Mon Sep 17 00:00:00 2001 From: Remus Lazar Date: Wed, 15 May 2019 20:08:54 +0200 Subject: [PATCH 1/3] ES document identifier: use node' identifier instead of path Use the NodeInterface::identifier instead of the path to calculate the elasticsearch document identifier. --- Classes/Indexer/NodeIndexer.php | 35 ++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/Classes/Indexer/NodeIndexer.php b/Classes/Indexer/NodeIndexer.php index b9687d1d..00d242f1 100644 --- a/Classes/Indexer/NodeIndexer.php +++ b/Classes/Indexer/NodeIndexer.php @@ -171,6 +171,35 @@ public function getIndex(): Index return $index; } + /** + * Something like getContextPath() but using the Node Identifier instead of the Path + * + * Result is a string like @;. + * @see NodeInterface::getContextPath() + * + * @param NodeInterface $node + * @return string + */ + protected static function getContextIdentifier(NodeInterface $node) + { + $contextIdentifier = $node->getIdentifier(); + + $context = $node->getContext(); + + $workspaceName = $context->getWorkspace()->getName(); + $contextIdentifier .= '@' . $workspaceName; + + if ($context->getDimensions() !== array()) { + $contextIdentifier .= ';'; + foreach ($context->getDimensions() as $dimensionName => $dimensionValues) { + $contextIdentifier .= $dimensionName . '=' . implode(',', $dimensionValues) . '&'; + } + $contextIdentifier = substr($contextIdentifier, 0, -1); + } + + return $contextIdentifier; + } + /** * Index this node, and add it to the current bulk request. * @@ -278,13 +307,13 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null) */ protected function calculateDocumentIdentifier(NodeInterface $node, $targetWorkspaceName = null): string { - $contextPath = $node->getContextPath(); + $contextIdentifier = self::getContextIdentifier($node); if ($targetWorkspaceName !== null) { - $contextPath = str_replace($node->getContext()->getWorkspace()->getName(), $targetWorkspaceName, $contextPath); + $contextIdentifier = str_replace($node->getContext()->getWorkspace()->getName(), $targetWorkspaceName, $contextIdentifier); } - return sha1($contextPath); + return sha1($contextIdentifier); } /** From 6d599ad0e7d030ff1098188ecb2853a889762631 Mon Sep 17 00:00:00 2001 From: Remus Lazar Date: Mon, 27 May 2019 15:09:08 +0200 Subject: [PATCH 2/3] Fixes a caching issue (node having an old path) in indexNode() This is basically just a workaround, see inline comments for details. --- Classes/Indexer/NodeIndexer.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Classes/Indexer/NodeIndexer.php b/Classes/Indexer/NodeIndexer.php index 00d242f1..140923b5 100644 --- a/Classes/Indexer/NodeIndexer.php +++ b/Classes/Indexer/NodeIndexer.php @@ -272,6 +272,12 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null) $handleNode = function (NodeInterface $node, Context $context) use ($targetWorkspaceName, $indexer) { $nodeFromContext = $context->getNodeByIdentifier($node->getIdentifier()); if ($nodeFromContext instanceof NodeInterface) { + if ($node->getPath() !== $nodeFromContext->getPath()) { + // If the node from context does have a different path, purge the context cache and re-fetch + // TODO: find the root cause for this bug and fix the node cache invalidation logic. + $context->getFirstLevelNodeCache()->flush(); + $nodeFromContext = $context->getNodeByIdentifier($node->getIdentifier()); + } $indexer($nodeFromContext, $targetWorkspaceName); } else { $documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName); From 040f9fd8da1a3569b538e732315d18d1918ce388 Mon Sep 17 00:00:00 2001 From: Remus Lazar Date: Fri, 31 May 2019 13:52:58 +0200 Subject: [PATCH 3/3] Use the correct documentIdentifier logic for fulltext indexing This refactores the calculateDocumentIdentifier() method and make it publicly available so we can use the same logic in the IndexerDriver for fulltext indexing. --- Classes/Driver/Version5/IndexerDriver.php | 9 +++------ Classes/Indexer/NodeIndexer.php | 8 ++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/Classes/Driver/Version5/IndexerDriver.php b/Classes/Driver/Version5/IndexerDriver.php index 711a305e..bbf4e790 100644 --- a/Classes/Driver/Version5/IndexerDriver.php +++ b/Classes/Driver/Version5/IndexerDriver.php @@ -16,6 +16,7 @@ use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Driver\AbstractIndexerDriver; use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Driver\IndexerDriverInterface; +use Flowpack\ElasticSearch\ContentRepositoryAdaptor\Indexer\NodeIndexer; use Flowpack\ElasticSearch\Domain\Model\Document as ElasticSearchDocument; use Neos\ContentRepository\Domain\Model\NodeInterface; use Neos\Flow\Annotations as Flow; @@ -88,11 +89,7 @@ public function fulltext(NodeInterface $node, array $fulltextIndexOfNode, string return []; } - $closestFulltextNodeContextPath = $closestFulltextNode->getContextPath(); - if ($targetWorkspaceName !== null) { - $closestFulltextNodeContextPath = str_replace($node->getContext()->getWorkspace()->getName(), $targetWorkspaceName, $closestFulltextNodeContextPath); - } - $closestFulltextNodeDocumentIdentifier = sha1($closestFulltextNodeContextPath); + $closestFulltextNodeDocumentIdentifier = NodeIndexer::calculateDocumentIdentifier($closestFulltextNode); if ($closestFulltextNode->isRemoved()) { // fulltext root is removed, abort silently... @@ -101,7 +98,7 @@ public function fulltext(NodeInterface $node, array $fulltextIndexOfNode, string return []; } - $this->logger->log(sprintf('NodeIndexer (%s): Updated fulltext index for %s (%s)', $closestFulltextNodeDocumentIdentifier, $closestFulltextNodeContextPath, $closestFulltextNode->getIdentifier()), LOG_DEBUG, null, 'ElasticSearch (CR)'); + $this->logger->log(sprintf('NodeIndexer (%s): Updated fulltext index for %s (%s)', $closestFulltextNodeDocumentIdentifier, $closestFulltextNode->getPath(), $closestFulltextNode->getIdentifier()), LOG_DEBUG, null, 'ElasticSearch (CR)'); $upsertFulltextParts = []; if (!empty($fulltextIndexOfNode)) { diff --git a/Classes/Indexer/NodeIndexer.php b/Classes/Indexer/NodeIndexer.php index 140923b5..e220c0df 100644 --- a/Classes/Indexer/NodeIndexer.php +++ b/Classes/Indexer/NodeIndexer.php @@ -229,7 +229,7 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null) $contextPath = str_replace($node->getContext()->getWorkspace()->getName(), $targetWorkspaceName, $contextPath); } - $documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName); + $documentIdentifier = self::calculateDocumentIdentifier($node, $targetWorkspaceName); $nodeType = $node->getNodeType(); $mappingType = $this->getIndex()->findType($this->nodeTypeMappingBuilder->convertNodeTypeNameToMappingName($nodeType->getName())); @@ -280,7 +280,7 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null) } $indexer($nodeFromContext, $targetWorkspaceName); } else { - $documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName); + $documentIdentifier = self::calculateDocumentIdentifier($node, $targetWorkspaceName); if ($node->isRemoved()) { $this->removeNode($node, $context->getWorkspaceName()); $this->logger->log(sprintf('NodeIndexer (%s): Removed node with identifier %s, no longer in workspace %s', $documentIdentifier, $node->getIdentifier(), $context->getWorkspaceName()), LOG_DEBUG, null, 'ElasticSearch (CR)'); @@ -311,7 +311,7 @@ public function indexNode(NodeInterface $node, $targetWorkspaceName = null) * @return string * @throws \Neos\Flow\Persistence\Exception\IllegalObjectTypeException */ - protected function calculateDocumentIdentifier(NodeInterface $node, $targetWorkspaceName = null): string + public static function calculateDocumentIdentifier(NodeInterface $node, $targetWorkspaceName = null): string { $contextIdentifier = self::getContextIdentifier($node); @@ -345,7 +345,7 @@ public function removeNode(NodeInterface $node, string $targetWorkspaceName = nu } } - $documentIdentifier = $this->calculateDocumentIdentifier($node, $targetWorkspaceName); + $documentIdentifier = self::calculateDocumentIdentifier($node, $targetWorkspaceName); $this->currentBulkRequest[] = $this->documentDriver->delete($node, $documentIdentifier); $this->currentBulkRequest[] = $this->indexerDriver->fulltext($node, [], $targetWorkspaceName);