From e2db6a91dc2ea518a811fc249ff027255f32b194 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Thu, 29 Feb 2024 20:19:35 +0100 Subject: [PATCH 1/4] GH-4921 Turtle writer does not respect namespaces in IRIs - SimpleValueFactory.createIRI(String, String) how actually properly respects the specified namespace and localname - TurtleWriter tries to use the namespace encoded in the IRI unless the localname contains characters which are not valid in prefixed notation - Added test --- .../eclipse/rdf4j/model/impl/SimpleIRI.java | 18 ++++++++++++++++++ .../rdf4j/model/impl/SimpleValueFactory.java | 2 +- .../eclipse/rdf4j/rio/turtle/TurtleUtil.java | 11 +++++++++++ .../eclipse/rdf4j/rio/turtle/TurtleWriter.java | 15 ++++++++++++--- .../rdf4j/rio/turtle/TurtleWriterTest.java | 16 ++++++++++++++++ 5 files changed, 58 insertions(+), 4 deletions(-) diff --git a/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleIRI.java b/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleIRI.java index 1b6aea8d5bb..7c26e671559 100644 --- a/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleIRI.java +++ b/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleIRI.java @@ -67,10 +67,28 @@ protected SimpleIRI(String iriString) { setIRIString(iriString); } + protected SimpleIRI(String namespace, String localname) { + setIRIString(namespace, localname); + } + /*---------* * Methods * *---------*/ + protected void setIRIString(String namespace, String localname) { + Objects.requireNonNull(namespace, "namespace must not be null"); + Objects.requireNonNull(localname, "localname must not be null"); + + String joinedIriString = namespace + localname; + + if (joinedIriString.indexOf(':') < 0) { + throw new IllegalArgumentException("Not a valid (absolute) IRI: " + joinedIriString); + } + + this.iriString = joinedIriString; + this.localNameIdx = namespace.length(); + } + protected void setIRIString(String iriString) { Objects.requireNonNull(iriString, "iriString must not be null"); diff --git a/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleValueFactory.java b/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleValueFactory.java index 7492ebbd479..f00adaf1880 100644 --- a/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleValueFactory.java +++ b/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleValueFactory.java @@ -85,7 +85,7 @@ public IRI createIRI(String iri) { @Override public IRI createIRI(String namespace, String localName) { - return createIRI(namespace + localName); + return new SimpleIRI(namespace, localName); } @Override diff --git a/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleUtil.java b/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleUtil.java index a6165a8cd36..cedebf4f226 100644 --- a/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleUtil.java +++ b/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleUtil.java @@ -459,6 +459,17 @@ public static String encodeURIString(String s) { return s; } + public static boolean isValidPrefixedName(String s) { + final int numberOfCodePoints = s.codePointCount(0, s.length()); + for (int i = 1; i < numberOfCodePoints; i++) { + final int codePoint = s.codePointAt(i); + if (!isPN_CHARS(codePoint)) { + return false; + } + } + return true; + } + /** * Decodes an encoded Turtle string. Any \-escape sequences are substituted with their decoded value. * diff --git a/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleWriter.java b/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleWriter.java index 6645b7e72d2..985cc0920b2 100644 --- a/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleWriter.java +++ b/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleWriter.java @@ -537,11 +537,20 @@ protected void writeResource(Resource res, boolean canShorten) throws IOExceptio } protected void writeURI(IRI uri) throws IOException { - String uriString = uri.toString(); - - // Try to find a prefix for the URI's namespace String prefix = null; + if (TurtleUtil.isValidPrefixedName(uri.getLocalName())) { + prefix = namespaceTable.get(uri.getNamespace()); + if (prefix != null) { + // Namespace is mapped to a prefix; write abbreviated URI + writer.write(prefix); + writer.write(":"); + writer.write(uri.getLocalName()); + return; + } + } + // Try to find a prefix for the URI's namespace + String uriString = uri.toString(); int splitIdx = TurtleUtil.findURISplitIndex(uriString); if (splitIdx > 0) { String namespace = uriString.substring(0, splitIdx); diff --git a/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java b/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java index 17364444068..255776e4a4e 100644 --- a/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java +++ b/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java @@ -10,6 +10,7 @@ *******************************************************************************/ package org.eclipse.rdf4j.rio.turtle; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.StringReader; @@ -17,6 +18,7 @@ import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.impl.DynamicModelFactory; import org.eclipse.rdf4j.model.util.Models; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.Rio; @@ -672,6 +674,20 @@ public void testBlankNodeInlining_indirectCircularReferenceWithIRI() throws Exce assertTrue(Models.isomorphic(expected, actual)); } + @Test + public void testIriNamespace() throws Exception { + Model model = new DynamicModelFactory().createEmptyModel(); + String prefix = "foo-bar"; + String ns = "foo:this.is.my.bar."; + model.setNamespace(prefix, ns); + model.add(vf.createIRI(ns, "lala"), vf.createIRI(ns, "lulu"), vf.createIRI(ns, "lolo")); + + StringWriter stringWriter = new StringWriter(); + Rio.write(model, stringWriter, RDFFormat.TURTLE); + + assertThat(stringWriter.toString()).contains("foo-bar:lala foo-bar:lulu foo-bar:lolo ."); + } + @Test public void testIgnoreAbbreviateNumbers() throws Exception { StringWriter sw = new StringWriter(); From 1c36f7679dee9269e2c4dda4ae98726504b85540 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Wed, 6 Mar 2024 20:13:35 +0100 Subject: [PATCH 2/4] GH-4921 Turtle writer does not respect namespaces in IRIs - Added tests to check to which degree parsing a Turtle file with unconventional prefixes and serializing it back again results preserves format --- .../eclipse/rdf4j/rio/turtle/TurtleUtil.java | 17 ++++--- .../rdf4j/rio/turtle/TurtleWriterTest.java | 50 ++++++++++++++++++- 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleUtil.java b/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleUtil.java index cedebf4f226..cafcf9f2d45 100644 --- a/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleUtil.java +++ b/core/rio/turtle/src/main/java/org/eclipse/rdf4j/rio/turtle/TurtleUtil.java @@ -460,14 +460,17 @@ public static String encodeURIString(String s) { } public static boolean isValidPrefixedName(String s) { - final int numberOfCodePoints = s.codePointCount(0, s.length()); - for (int i = 1; i < numberOfCodePoints; i++) { - final int codePoint = s.codePointAt(i); - if (!isPN_CHARS(codePoint)) { - return false; - } + if (s == null || s.isEmpty()) { + return false; } - return true; + + if (!isPN_CHARS_BASE(s.codePointAt(0))) { + return false; + } + + return s.codePoints() // + .skip(1) // Skip the first code point + .allMatch(TurtleUtil::isPN_CHARS); } /** diff --git a/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java b/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java index 255776e4a4e..2aa752dcd5e 100644 --- a/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java +++ b/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java @@ -20,6 +20,7 @@ import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.impl.DynamicModelFactory; import org.eclipse.rdf4j.model.util.Models; +import org.eclipse.rdf4j.model.vocabulary.RDFS; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.Rio; import org.eclipse.rdf4j.rio.WriterConfig; @@ -97,7 +98,6 @@ public void testBlankNodeInlining1() throws Exception { Model actual = Rio.parse(new StringReader(stringWriter.toString()), "", RDFFormat.TURTLE); assertTrue(Models.isomorphic(expected, actual)); - } @Test @@ -194,6 +194,54 @@ public void testNoBuffering() throws Exception { assertTrue(Models.isomorphic(expected, actual)); } + @Test + public void testUnusualIrisAndPrefixesParseWriteCompare() throws Exception { + String data = "@prefix server-news: .\n" + + "@prefix rdfs: .\n" + + "server-news:unix rdfs:label \"News on Unix\" .\n" + + "server-news:windows rdfs:label \"News on Windows\" .\n"; + + var expected = Rio.parse(new StringReader(data), "", RDFFormat.TURTLE); + + var stringWriter = new StringWriter(); + var config = new WriterConfig(); + config.set(BasicWriterSettings.INLINE_BLANK_NODES, false); + config.set(BasicWriterSettings.PRETTY_PRINT, false); + Rio.write(expected, stringWriter, RDFFormat.TURTLE, config); + + var actual = Rio.parse(new StringReader(stringWriter.toString()), "", RDFFormat.TURTLE); + assertThat(Models.isomorphic(expected, actual)).as("isomorphic").isTrue(); + + assertThat(stringWriter.toString()).isEqualTo(data); + } + + @Test + public void testUnusualIrisAndPrefixesWriteParserWriteCompare() throws Exception { + var prefix = "server-news"; + var ns = "news:comp.infosystems.www.servers."; + + var config = new WriterConfig(); + config.set(BasicWriterSettings.INLINE_BLANK_NODES, false); + config.set(BasicWriterSettings.PRETTY_PRINT, false); + + var expectedModel = new DynamicModelFactory().createEmptyModel(); + expectedModel.setNamespace(prefix, ns); + expectedModel.setNamespace(RDFS.PREFIX, RDFS.NAMESPACE); + expectedModel.add(vf.createIRI(ns, "unix"), RDFS.LABEL, vf.createLiteral("News on Unix")); + expectedModel.add(vf.createIRI(ns, "windows"), RDFS.LABEL, vf.createLiteral("News on Windows")); + + var turtle1 = new StringWriter(); + Rio.write(expectedModel, turtle1, RDFFormat.TURTLE, config); + + var actualModel = Rio.parse(new StringReader(turtle1.toString()), "", RDFFormat.TURTLE); + assertThat(Models.isomorphic(expectedModel, actualModel)).as("isomorphic").isTrue(); + + var turtle2 = new StringWriter(); + Rio.write(actualModel, turtle2, RDFFormat.TURTLE, config); + + assertThat(turtle2.toString()).isEqualTo(turtle1.toString()); + } + @Test @Disabled public void anotherBnodeTest() throws Exception { From 0e584a44c62324ada5a3e3fb97022e670e56c61e Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Wed, 6 Mar 2024 20:30:42 +0100 Subject: [PATCH 3/4] GH-4921 Turtle writer does not respect namespaces in IRIs - Commented out failing parts of new tests and refer to followup issue --- .../java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java b/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java index 2aa752dcd5e..457142d474c 100644 --- a/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java +++ b/core/rio/turtle/src/test/java/org/eclipse/rdf4j/rio/turtle/TurtleWriterTest.java @@ -212,7 +212,8 @@ public void testUnusualIrisAndPrefixesParseWriteCompare() throws Exception { var actual = Rio.parse(new StringReader(stringWriter.toString()), "", RDFFormat.TURTLE); assertThat(Models.isomorphic(expected, actual)).as("isomorphic").isTrue(); - assertThat(stringWriter.toString()).isEqualTo(data); + // Requires https://github.com/eclipse-rdf4j/rdf4j/issues/4929 to be fixed + // assertThat(stringWriter.toString()).isEqualTo(data); } @Test @@ -239,7 +240,8 @@ public void testUnusualIrisAndPrefixesWriteParserWriteCompare() throws Exception var turtle2 = new StringWriter(); Rio.write(actualModel, turtle2, RDFFormat.TURTLE, config); - assertThat(turtle2.toString()).isEqualTo(turtle1.toString()); + // Requires https://github.com/eclipse-rdf4j/rdf4j/issues/4929 to be fixed + // assertThat(turtle2.toString()).isEqualTo(turtle1.toString()); } @Test From 10e92446edee8057c292c6b28d879e3961b8568b Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Wed, 6 Mar 2024 21:28:20 +0100 Subject: [PATCH 4/4] GH-4921 Turtle writer does not respect namespaces in IRIs - Fix RDFXML test by checking if a localname preference indicated by the IRI is valid in XML - if not, try to find an alternative representation --- .../eclipse/rdf4j/model/impl/SimpleIRI.java | 2 +- .../rdf4j/rio/rdfxml/RDFXMLWriter.java | 1 + .../rio/rdfxml/util/RDFXMLPrettyWriter.java | 51 ++++++++++++++++--- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleIRI.java b/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleIRI.java index 7c26e671559..e40b15e714f 100644 --- a/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleIRI.java +++ b/core/model/src/main/java/org/eclipse/rdf4j/model/impl/SimpleIRI.java @@ -79,7 +79,7 @@ protected void setIRIString(String namespace, String localname) { Objects.requireNonNull(namespace, "namespace must not be null"); Objects.requireNonNull(localname, "localname must not be null"); - String joinedIriString = namespace + localname; + var joinedIriString = namespace + localname; if (joinedIriString.indexOf(':') < 0) { throw new IllegalArgumentException("Not a valid (absolute) IRI: " + joinedIriString); diff --git a/core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/RDFXMLWriter.java b/core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/RDFXMLWriter.java index 26e5a39c912..01bf1e5e864 100644 --- a/core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/RDFXMLWriter.java +++ b/core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/RDFXMLWriter.java @@ -106,6 +106,7 @@ public Writer getWriter() { return writer; } + @Override public Collection> getSupportedSettings() { final Collection> settings = new HashSet<>(super.getSupportedSettings()); settings.add(BasicWriterSettings.BASE_DIRECTIVE); diff --git a/core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/util/RDFXMLPrettyWriter.java b/core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/util/RDFXMLPrettyWriter.java index 5e6f6fcb54a..487df47dbd1 100644 --- a/core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/util/RDFXMLPrettyWriter.java +++ b/core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/util/RDFXMLPrettyWriter.java @@ -18,8 +18,10 @@ import java.util.Collection; import java.util.HashSet; import java.util.Stack; +import java.util.regex.Pattern; import org.eclipse.rdf4j.common.net.ParsedIRI; +import org.eclipse.rdf4j.common.xml.XMLUtil; import org.eclipse.rdf4j.model.BNode; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; @@ -260,8 +262,9 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept writeIndents(i * 2 - 1); IRI predicate = predicateStack.get(i - 1); + var predicateQName = new QName(predicate); - writeStartTag(predicate.getNamespace(), predicate.getLocalName()); + writeStartTag(predicateQName.getNamespace(), predicateQName.getLocalName()); writeNewLine(); } @@ -281,6 +284,7 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept writeNewLine(); } else { IRI topPredicate = predicateStack.pop(); + var topPredicateQName = new QName(topPredicate); if (!topNode.hasType()) { // we can use an abbreviated predicate @@ -291,7 +295,7 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept // written out as well writeIndents(nodeStack.size() * 2 - 1); - writeStartTag(topPredicate.getNamespace(), topPredicate.getLocalName()); + writeStartTag(topPredicateQName.getNamespace(), topPredicateQName.getLocalName()); writeNewLine(); // write out an empty subject @@ -300,7 +304,7 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept writeNewLine(); writeIndents(nodeStack.size() * 2 - 1); - writeEndTag(topPredicate.getNamespace(), topPredicate.getLocalName()); + writeEndTag(topPredicateQName.getNamespace(), topPredicateQName.getLocalName()); writeNewLine(); } } @@ -322,10 +326,11 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept if (predicateStack.size() > 0) { IRI nextPredicate = predicateStack.pop(); + var nextPredicateQName = new QName(nextPredicate); writeIndents(predicateStack.size() + nodeStack.size()); - writeEndTag(nextPredicate.getNamespace(), nextPredicate.getLocalName()); + writeEndTag(nextPredicateQName.getNamespace(), nextPredicateQName.getLocalName()); writeNewLine(); } @@ -392,7 +397,8 @@ private void writeNodeStartOfStartTag(Node node) throws IOException, RDFHandlerE if (node.hasType()) { // We can use abbreviated syntax - writeStartOfStartTag(node.getType().getNamespace(), node.getType().getLocalName()); + var nodeTypeQName = new QName(node.getType()); + writeStartOfStartTag(nodeTypeQName.getNamespace(), nodeTypeQName.getLocalName()); } else { // We cannot use abbreviated syntax writeStartOfStartTag(RDF.NAMESPACE, "Description"); @@ -423,7 +429,8 @@ private void writeNodeStartTag(Node node) throws IOException, RDFHandlerExceptio */ private void writeNodeEndTag(Node node) throws IOException { if (node.getType() != null) { - writeEndTag(node.getType().getNamespace(), node.getType().getLocalName()); + var nodeTypeQName = new QName(node.getType()); + writeEndTag(nodeTypeQName.getNamespace(), nodeTypeQName.getLocalName()); } else { writeEndTag(RDF.NAMESPACE, "Description"); } @@ -442,7 +449,8 @@ private void writeNodeEmptyTag(Node node) throws IOException, RDFHandlerExceptio * Write out an empty property element. */ private void writeAbbreviatedPredicate(IRI pred, Value obj) throws IOException, RDFHandlerException { - writeStartOfStartTag(pred.getNamespace(), pred.getLocalName()); + var predQName = new QName(pred); + writeStartOfStartTag(predQName.getNamespace(), predQName.getLocalName()); if (obj instanceof Resource) { Resource objRes = (Resource) obj; @@ -484,7 +492,7 @@ private void writeAbbreviatedPredicate(IRI pred, Value obj) throws IOException, writeCharacterData(objLit.getLabel()); } - writeEndTag(pred.getNamespace(), pred.getLocalName()); + writeEndTag(predQName.getNamespace(), predQName.getLocalName()); } writeNewLine(); @@ -565,4 +573,31 @@ public boolean isWritten() { return isWritten; } } + + private static class QName { + private static final Pattern VALID_XML_ELEMENT_NAME = Pattern.compile("[a-zA-Z_][a-zA-Z0-9_\\-\\.]*"); + + private final String namespace; + private final String localName; + + public QName(IRI resource) { + if (!VALID_XML_ELEMENT_NAME.matcher(resource.getLocalName()).matches()) { + var iriString = resource.getNamespace() + resource.getLocalName(); + var sep = XMLUtil.findURISplitIndex(iriString); + namespace = iriString.substring(0, sep); + localName = iriString.substring(sep); + } else { + localName = resource.getLocalName(); + namespace = resource.getNamespace(); + } + } + + public String getLocalName() { + return localName; + } + + public String getNamespace() { + return namespace; + } + } }