From 40499a919b354a95ecd9d2c94117727efaba892b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20W=C3=B3js?= Date: Tue, 19 Mar 2024 07:38:24 +0100 Subject: [PATCH 1/3] IBX-7987: Added extension point to skip nodes while extracting text --- .../config/settings/fieldtype_services.yaml | 19 ++++++++ .../NodeFilterFactoryInterface.php | 14 ++++++ .../TextExtractor/NodeFilterInterface.php | 22 +++++++++ .../TextExtractor/FullTextExtractor.php | 14 +++++- .../NodeFilter/AggregateFilter.php | 37 +++++++++++++++ .../NodeFilter/NodeFilterFactory.php | 20 ++++++++ .../NodeFilter/NodePathFilter.php | 40 ++++++++++++++++ .../TextExtractor/FullTextExtractorTest.php | 6 ++- .../NodeFilter/AggregateFilterTest.php | 33 +++++++++++++ .../NodeFilter/NodePathFilterTest.php | 47 +++++++++++++++++++ 10 files changed, 250 insertions(+), 2 deletions(-) create mode 100644 src/contracts/RichText/TextExtractor/NodeFilterFactoryInterface.php create mode 100644 src/contracts/RichText/TextExtractor/NodeFilterInterface.php create mode 100644 src/lib/RichText/TextExtractor/NodeFilter/AggregateFilter.php create mode 100644 src/lib/RichText/TextExtractor/NodeFilter/NodeFilterFactory.php create mode 100644 src/lib/RichText/TextExtractor/NodeFilter/NodePathFilter.php create mode 100644 tests/lib/RichText/TextExtractor/NodeFilter/AggregateFilterTest.php create mode 100644 tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php diff --git a/src/bundle/Resources/config/settings/fieldtype_services.yaml b/src/bundle/Resources/config/settings/fieldtype_services.yaml index 0ee71f70..c38a5d9c 100644 --- a/src/bundle/Resources/config/settings/fieldtype_services.yaml +++ b/src/bundle/Resources/config/settings/fieldtype_services.yaml @@ -70,3 +70,22 @@ services: Ibexa\FieldTypeRichText\RichText\TextExtractor\FullTextExtractor: ~ + Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface: + alias: Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter + + Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface: + alias: Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodeFilterFactory + + Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodeFilterFactory: ~ + + Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter: + arguments: + $filters: !tagged ibexa.field_type.richtext.text_extractor.node_filter + + ibexa.field_type.richtext.text_extractor.node_filter.template: + class: Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface + factory: ['@Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface', 'createPathFilter'] + arguments: ['eztemplate', 'ezconfig'] + tags: + - { name: ibexa.field_type.richtext.text_extractor.node_filter } + diff --git a/src/contracts/RichText/TextExtractor/NodeFilterFactoryInterface.php b/src/contracts/RichText/TextExtractor/NodeFilterFactoryInterface.php new file mode 100644 index 00000000..fae43431 --- /dev/null +++ b/src/contracts/RichText/TextExtractor/NodeFilterFactoryInterface.php @@ -0,0 +1,14 @@ +filter = $filter; + } + public function extractText(DOMDocument $document): string { return null !== $document->documentElement @@ -28,8 +36,12 @@ public function extractText(DOMDocument $document): string private function extractTextFromNode(DOMNode $node): string { - $text = ''; + if ($this->filter->filter($node) === true) { + // Node is excluded + return ''; + } + $text = ''; if ($node->childNodes !== null && $node->childNodes->count() > 0) { foreach ($node->childNodes as $child) { $text .= $this->extractTextFromNode($child); diff --git a/src/lib/RichText/TextExtractor/NodeFilter/AggregateFilter.php b/src/lib/RichText/TextExtractor/NodeFilter/AggregateFilter.php new file mode 100644 index 00000000..8d1a8586 --- /dev/null +++ b/src/lib/RichText/TextExtractor/NodeFilter/AggregateFilter.php @@ -0,0 +1,37 @@ +filters = $filters; + } + + public function filter(DOMNode $node): bool + { + foreach ($this->filters as $filter) { + if ($filter->filter($node)) { + return true; + } + } + + return false; + } +} diff --git a/src/lib/RichText/TextExtractor/NodeFilter/NodeFilterFactory.php b/src/lib/RichText/TextExtractor/NodeFilter/NodeFilterFactory.php new file mode 100644 index 00000000..5d37f580 --- /dev/null +++ b/src/lib/RichText/TextExtractor/NodeFilter/NodeFilterFactory.php @@ -0,0 +1,20 @@ +path = array_reverse($path); + } + + public function filter(DOMNode $node): bool + { + foreach ($this->path as $name) { + if ($node === null || $node->nodeName !== $name) { + return false; + } + + $node = $node->parentNode; + } + + return true; + } +} diff --git a/tests/lib/RichText/TextExtractor/FullTextExtractorTest.php b/tests/lib/RichText/TextExtractor/FullTextExtractorTest.php index afb3c9ab..71c8e03f 100644 --- a/tests/lib/RichText/TextExtractor/FullTextExtractorTest.php +++ b/tests/lib/RichText/TextExtractor/FullTextExtractorTest.php @@ -8,13 +8,17 @@ namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor; +use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface; use Ibexa\FieldTypeRichText\RichText\TextExtractor\FullTextExtractor; final class FullTextExtractorTest extends BaseTest { protected function setUp(): void { - $this->textExtractor = new FullTextExtractor(); + $filter = $this->createMock(NodeFilterInterface::class); + $filter->method('filter')->willReturn(false); + + $this->textExtractor = new FullTextExtractor($filter); } public function providerForTestExtractText(): array diff --git a/tests/lib/RichText/TextExtractor/NodeFilter/AggregateFilterTest.php b/tests/lib/RichText/TextExtractor/NodeFilter/AggregateFilterTest.php new file mode 100644 index 00000000..1d6c4823 --- /dev/null +++ b/tests/lib/RichText/TextExtractor/NodeFilter/AggregateFilterTest.php @@ -0,0 +1,33 @@ +createMock(DOMNode::class); + + $filterA = $this->createMock(NodeFilterInterface::class); + $filterA->expects(self::once())->method('filter')->with($node)->willReturn(false); + $filterB = $this->createMock(NodeFilterInterface::class); + $filterB->expects(self::once())->method('filter')->with($node)->willReturn(true); + $filterC = $this->createMock(NodeFilterInterface::class); + $filterC->expects(self::never())->method('filter'); + + $aggregateFilter = new AggregateFilter([$filterA, $filterB, $filterC]); + + self::assertTrue($aggregateFilter->filter($node)); + } +} diff --git a/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php b/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php new file mode 100644 index 00000000..2a136ba6 --- /dev/null +++ b/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php @@ -0,0 +1,47 @@ +loadXML(''); + + $nodeA = $this->getNode($document, '//a'); + $nodeB = $this->getNode($document, '//b'); + $nodeC = $this->getNode($document, '//c'); + + self::assertFalse((new NodePathFilter('b', 'c'))->filter($nodeB)); + self::assertTrue((new NodePathFilter('b', 'c'))->filter($nodeC)); + self::assertFalse((new NodePathFilter('a', 'b', 'c', 'd'))->filter($nodeA)); + } + + private function getNode(DOMDocument $document, string $expression): DOMNode + { + $xpath = new DOMXPath($document); + + $results = $xpath->query($expression); + if ($results instanceof DOMNodeList) { + /** @var \DOMNode */ + return $results->item(0); + } + + throw new RuntimeException("Expression '$expression' did not return a node."); + } +} From 657097c9be9361ce3db22f98b6d7ec05f49f9271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20W=C3=B3js?= Date: Sat, 30 Mar 2024 08:00:07 +0100 Subject: [PATCH 2/3] Update tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php Co-authored-by: Andrew Longosz --- .../RichText/TextExtractor/NodeFilter/NodePathFilterTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php b/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php index 2a136ba6..46fe068b 100644 --- a/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php +++ b/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php @@ -42,6 +42,6 @@ private function getNode(DOMDocument $document, string $expression): DOMNode return $results->item(0); } - throw new RuntimeException("Expression '$expression' did not return a node."); + self::fail("Expression '$expression' did not return a node."); } } From 206d41731b739fd35fcca5b54cdf2f70bafe94b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20W=C3=B3js?= Date: Mon, 22 Apr 2024 10:59:46 +0200 Subject: [PATCH 3/3] fixup! IBX-7987: Added extension point to skip nodes while extracting text --- .../lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php b/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php index 46fe068b..4ddc63cb 100644 --- a/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php +++ b/tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php @@ -14,7 +14,6 @@ use DOMXPath; use Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodePathFilter; use PHPUnit\Framework\TestCase; -use RuntimeException; final class NodePathFilterTest extends TestCase {