Implement DOMNode.getTextContent() according to API

Fix #1695 Signed-off-by: Christoph Läubrich <[email protected]>
eclipse-lemminx · Nov 16, 2024 · 8c1d4c1 · 8c1d4c1
1 parent 0aadd16
commit 8c1d4c1
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 5 deletions.
diff --git a/org.eclipse.lemminx/pom.xml b/org.eclipse.lemminx/pom.xml
@@ -228,7 +228,7 @@
 		<dependency>
 			<groupId>xml-apis</groupId>
 			<artifactId>xml-apis</artifactId>
-			<version>2.0.2</version>
+			<version>1.4.01</version>
 		</dependency>
 		<dependency>
 			<groupId>com.kotcrab.remark</groupId>

diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java
@@ -827,7 +827,39 @@ public DOMElement getOrphanEndElement(int offset, String tagName, boolean anyOrp
 	 */
 	@Override
 	public String getTextContent() throws DOMException {
-		return getNodeValue();
+
+		switch (getNodeType()) {
+		// Text like nodes simply return their node value
+		case Node.TEXT_NODE:
+		case Node.CDATA_SECTION_NODE:
+		case Node.COMMENT_NODE:
+		case Node.PROCESSING_INSTRUCTION_NODE:
+			return getNodeValue();
+		// These special types has to return null
+		case Node.DOCUMENT_NODE:
+		case Node.DOCUMENT_TYPE_NODE:
+		case Node.NOTATION_NODE:
+			return null;
+		// concatenation of the textContent attribute value of every child node
+		default:
+			if (this.children != null && children.size() > 0) {
+				final StringBuilder builder = new StringBuilder();
+				for (DOMNode child : children) {
+					short nodeType = child.getNodeType();
+					if (nodeType == Node.COMMENT_NODE || nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
+						// excluding COMMENT_NODE and PROCESSING_INSTRUCTION_NODE nodes.
+						continue;
+					}
+					String text = child.getTextContent();
+					if (text != null && !text.isEmpty()) {
+						builder.append(text);
+					}
+				}
+				return builder.toString();
+			}
+			// empty string if the node has no children
+			return "";
+		}
 	}
 
 	@Override

diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/DOMParserTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/DOMParserTest.java
@@ -17,11 +17,19 @@
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.io.ByteArrayInputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.List;
+import java.util.function.Function;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
 
 import org.eclipse.lemminx.dom.DOMDocumentType.DocumentTypeKind;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
 
 /**
  * XML parser tests.
@@ -45,6 +53,66 @@ public void testNestedElement() {
 		assertDocument("<html><body></body></html>", html);
 	}
 
+	@Test
+	public void testGetTextContentWithSimpleContent() throws Exception {
+		assertTextContent("<a><b><c>Hello</c></b></a>", "Hello", Document::getDocumentElement);
+	}
+
+	@Test
+	public void testGetTextContentWithMixedContent() throws Exception {
+		assertTextContent("<a>H<b>e<c>ll</c></b>o</a>", "Hello", Document::getDocumentElement);
+	}
+
+	@Test
+	public void testGetTextContentWithComplexContent() throws Exception {
+		assertTextContent("<a><b>H</b><c>e</c><b>ll</b><x>o</x></a>", "Hello", Document::getDocumentElement);
+	}
+
+	@Test
+	public void testGetTextContentWithCharContent() throws Exception {
+		assertTextContent("<text>Hello</text>", "Hello", Document::getDocumentElement);
+	}
+
+	@Test
+	public void testGetTextContentWithCDATAContent() throws Exception {
+		assertTextContent("<a><b><c><![CDATA[Hello]]></c></b></a>", "Hello", Document::getDocumentElement);
+	}
+
+	@Test
+	public void testGetTextContentWithComment() throws Exception {
+		assertTextContent("<a><b><c>Hello</c><!-- comments must not be included --></b></a>", "Hello",
+				Document::getDocumentElement);
+	}
+
+	@Test
+	public void testGetTextIsNullForDocument() throws Exception {
+		assertTextContent("<a>Hello</a>", null, d -> d);
+	}
+
+	@Test
+	public void testGetTextContentWithPI() throws Exception {
+		assertTextContent("<a><b><c>Hello</c><?PI must not be included ?></b></a>", "Hello",
+				Document::getDocumentElement);
+	}
+
+	private void assertTextContent(String xml, String expected, Function<Document, Node> nodeExtractor)
+			throws Exception {
+		assertTextContent(DOMParser.getInstance().parse(xml, "uri", null), expected, nodeExtractor);
+		DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+		assertTextContent(builder.parse(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8))), expected,
+				nodeExtractor);
+	}
+
+	private void assertTextContent(Document document, String expected, Function<Document, Node> nodeExtractor) {
+		String textContent = nodeExtractor.apply(document).getTextContent();
+		if (expected != null) {
+			assertNotNull(textContent);
+		}
+		assertEquals(expected, textContent);
+	}
+
+
+
 	@Test
 	public void testNestedElements() {
 		DOMNode head = createElement("head", 6, 12, 19, true);
@@ -56,6 +124,7 @@ public void testNestedElements() {
 		assertDocument("<html><head></head><body></body></html>", html);
 	}
 
+
 	@Test
 	public void testNestedNestedElements() {
 		DOMNode c = createElement("c", 6, 9, 13, true);
@@ -95,7 +164,7 @@ public void testEmptyTagT() {
 
 	@Test
 	public void singleEndTag() {
-		DOMElement meta = (DOMElement) createElement("meta", 0, 0, 7, false);
+		DOMElement meta = createElement("meta", 0, 0, 7, false);
 		assertDocument("</meta>", meta);
 		assertFalse(meta.hasStartTag());
 		assertTrue(meta.hasEndTag());
@@ -104,8 +173,8 @@ public void singleEndTag() {
 
 	@Test
 	public void insideEndTag() {
-		DOMElement meta = (DOMElement) createElement("meta", 6, 6, 13, false);
-		DOMElement html = (DOMElement) createElement("html", 0, 13, 20, true);
+		DOMElement meta = createElement("meta", 6, 6, 13, false);
+		DOMElement html = createElement("html", 0, 13, 20, true);
 		html.addChild(meta);
 
 		assertDocument("<html></meta></html>", html);