From 8c1d4c1c150c20920d204f1af25625c9d414e435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20L=C3=A4ubrich?= Date: Sun, 10 Nov 2024 18:09:04 +0100 Subject: [PATCH] Implement DOMNode.getTextContent() according to API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix https://github.com/eclipse-lemminx/lemminx/issues/1695 Signed-off-by: Christoph Läubrich --- org.eclipse.lemminx/pom.xml | 2 +- .../java/org/eclipse/lemminx/dom/DOMNode.java | 34 ++++++++- .../eclipse/lemminx/dom/DOMParserTest.java | 75 ++++++++++++++++++- 3 files changed, 106 insertions(+), 5 deletions(-) diff --git a/org.eclipse.lemminx/pom.xml b/org.eclipse.lemminx/pom.xml index cb2518148..0f54a23fa 100644 --- a/org.eclipse.lemminx/pom.xml +++ b/org.eclipse.lemminx/pom.xml @@ -228,7 +228,7 @@ xml-apis xml-apis - 2.0.2 + 1.4.01 com.kotcrab.remark diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java index 40aeca47b..4bec436c1 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java @@ -827,7 +827,39 @@ public DOMElement getOrphanEndElement(int offset, String tagName, boolean anyOrp */ @Override public String getTextContent() throws DOMException { - return getNodeValue(); + + switch (getNodeType()) { + // Text like nodes simply return their node value + case Node.TEXT_NODE: + case Node.CDATA_SECTION_NODE: + case Node.COMMENT_NODE: + case Node.PROCESSING_INSTRUCTION_NODE: + return getNodeValue(); + // These special types has to return null + case Node.DOCUMENT_NODE: + case Node.DOCUMENT_TYPE_NODE: + case Node.NOTATION_NODE: + return null; + // concatenation of the textContent attribute value of every child node + default: + if (this.children != null && children.size() > 0) { + final StringBuilder builder = new StringBuilder(); + for (DOMNode child : children) { + short nodeType = child.getNodeType(); + if (nodeType == Node.COMMENT_NODE || nodeType == Node.PROCESSING_INSTRUCTION_NODE) { + // excluding COMMENT_NODE and PROCESSING_INSTRUCTION_NODE nodes. + continue; + } + String text = child.getTextContent(); + if (text != null && !text.isEmpty()) { + builder.append(text); + } + } + return builder.toString(); + } + // empty string if the node has no children + return ""; + } } @Override diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/DOMParserTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/DOMParserTest.java index 497fbc220..495aa4fcd 100644 --- a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/DOMParserTest.java +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/DOMParserTest.java @@ -17,11 +17,19 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.function.Function; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; import org.eclipse.lemminx.dom.DOMDocumentType.DocumentTypeKind; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.w3c.dom.Document; +import org.w3c.dom.Node; /** * XML parser tests. @@ -45,6 +53,66 @@ public void testNestedElement() { assertDocument("", html); } + @Test + public void testGetTextContentWithSimpleContent() throws Exception { + assertTextContent("Hello", "Hello", Document::getDocumentElement); + } + + @Test + public void testGetTextContentWithMixedContent() throws Exception { + assertTextContent("Hello", "Hello", Document::getDocumentElement); + } + + @Test + public void testGetTextContentWithComplexContent() throws Exception { + assertTextContent("Hello", "Hello", Document::getDocumentElement); + } + + @Test + public void testGetTextContentWithCharContent() throws Exception { + assertTextContent("Hello", "Hello", Document::getDocumentElement); + } + + @Test + public void testGetTextContentWithCDATAContent() throws Exception { + assertTextContent("", "Hello", Document::getDocumentElement); + } + + @Test + public void testGetTextContentWithComment() throws Exception { + assertTextContent("Hello", "Hello", + Document::getDocumentElement); + } + + @Test + public void testGetTextIsNullForDocument() throws Exception { + assertTextContent("Hello", null, d -> d); + } + + @Test + public void testGetTextContentWithPI() throws Exception { + assertTextContent("Hello", "Hello", + Document::getDocumentElement); + } + + private void assertTextContent(String xml, String expected, Function nodeExtractor) + throws Exception { + assertTextContent(DOMParser.getInstance().parse(xml, "uri", null), expected, nodeExtractor); + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + assertTextContent(builder.parse(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8))), expected, + nodeExtractor); + } + + private void assertTextContent(Document document, String expected, Function nodeExtractor) { + String textContent = nodeExtractor.apply(document).getTextContent(); + if (expected != null) { + assertNotNull(textContent); + } + assertEquals(expected, textContent); + } + + + @Test public void testNestedElements() { DOMNode head = createElement("head", 6, 12, 19, true); @@ -56,6 +124,7 @@ public void testNestedElements() { assertDocument("", html); } + @Test public void testNestedNestedElements() { DOMNode c = createElement("c", 6, 9, 13, true); @@ -95,7 +164,7 @@ public void testEmptyTagT() { @Test public void singleEndTag() { - DOMElement meta = (DOMElement) createElement("meta", 0, 0, 7, false); + DOMElement meta = createElement("meta", 0, 0, 7, false); assertDocument("", meta); assertFalse(meta.hasStartTag()); assertTrue(meta.hasEndTag()); @@ -104,8 +173,8 @@ public void singleEndTag() { @Test public void insideEndTag() { - DOMElement meta = (DOMElement) createElement("meta", 6, 6, 13, false); - DOMElement html = (DOMElement) createElement("html", 0, 13, 20, true); + DOMElement meta = createElement("meta", 6, 6, 13, false); + DOMElement html = createElement("html", 0, 13, 20, true); html.addChild(meta); assertDocument("", html);