Skip to content

Commit

Permalink
GH-4921 Turtle writer does not respect namespaces in IRIs (#4922)
Browse files Browse the repository at this point in the history
  • Loading branch information
hmottestad authored Mar 20, 2024
2 parents 5c0a1b3 + 10e9244 commit 1015439
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,28 @@ protected SimpleIRI(String iriString) {
setIRIString(iriString);
}

protected SimpleIRI(String namespace, String localname) {
setIRIString(namespace, localname);
}

/*---------*
* Methods *
*---------*/

protected void setIRIString(String namespace, String localname) {
Objects.requireNonNull(namespace, "namespace must not be null");
Objects.requireNonNull(localname, "localname must not be null");

var joinedIriString = namespace + localname;

if (joinedIriString.indexOf(':') < 0) {
throw new IllegalArgumentException("Not a valid (absolute) IRI: " + joinedIriString);
}

this.iriString = joinedIriString;
this.localNameIdx = namespace.length();
}

protected void setIRIString(String iriString) {
Objects.requireNonNull(iriString, "iriString must not be null");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public IRI createIRI(String iri) {

@Override
public IRI createIRI(String namespace, String localName) {
return createIRI(namespace + localName);
return new SimpleIRI(namespace, localName);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ public Writer getWriter() {
return writer;
}

@Override
public Collection<RioSetting<?>> getSupportedSettings() {
final Collection<RioSetting<?>> settings = new HashSet<>(super.getSupportedSettings());
settings.add(BasicWriterSettings.BASE_DIRECTIVE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
import java.util.Collection;
import java.util.HashSet;
import java.util.Stack;
import java.util.regex.Pattern;

import org.eclipse.rdf4j.common.net.ParsedIRI;
import org.eclipse.rdf4j.common.xml.XMLUtil;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
Expand Down Expand Up @@ -260,8 +262,9 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept
writeIndents(i * 2 - 1);

IRI predicate = predicateStack.get(i - 1);
var predicateQName = new QName(predicate);

writeStartTag(predicate.getNamespace(), predicate.getLocalName());
writeStartTag(predicateQName.getNamespace(), predicateQName.getLocalName());
writeNewLine();
}

Expand All @@ -281,6 +284,7 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept
writeNewLine();
} else {
IRI topPredicate = predicateStack.pop();
var topPredicateQName = new QName(topPredicate);

if (!topNode.hasType()) {
// we can use an abbreviated predicate
Expand All @@ -291,7 +295,7 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept
// written out as well

writeIndents(nodeStack.size() * 2 - 1);
writeStartTag(topPredicate.getNamespace(), topPredicate.getLocalName());
writeStartTag(topPredicateQName.getNamespace(), topPredicateQName.getLocalName());
writeNewLine();

// write out an empty subject
Expand All @@ -300,7 +304,7 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept
writeNewLine();

writeIndents(nodeStack.size() * 2 - 1);
writeEndTag(topPredicate.getNamespace(), topPredicate.getLocalName());
writeEndTag(topPredicateQName.getNamespace(), topPredicateQName.getLocalName());
writeNewLine();
}
}
Expand All @@ -322,10 +326,11 @@ private void popStacks(Resource newSubject) throws IOException, RDFHandlerExcept

if (predicateStack.size() > 0) {
IRI nextPredicate = predicateStack.pop();
var nextPredicateQName = new QName(nextPredicate);

writeIndents(predicateStack.size() + nodeStack.size());

writeEndTag(nextPredicate.getNamespace(), nextPredicate.getLocalName());
writeEndTag(nextPredicateQName.getNamespace(), nextPredicateQName.getLocalName());

writeNewLine();
}
Expand Down Expand Up @@ -392,7 +397,8 @@ private void writeNodeStartOfStartTag(Node node) throws IOException, RDFHandlerE

if (node.hasType()) {
// We can use abbreviated syntax
writeStartOfStartTag(node.getType().getNamespace(), node.getType().getLocalName());
var nodeTypeQName = new QName(node.getType());
writeStartOfStartTag(nodeTypeQName.getNamespace(), nodeTypeQName.getLocalName());
} else {
// We cannot use abbreviated syntax
writeStartOfStartTag(RDF.NAMESPACE, "Description");
Expand Down Expand Up @@ -423,7 +429,8 @@ private void writeNodeStartTag(Node node) throws IOException, RDFHandlerExceptio
*/
private void writeNodeEndTag(Node node) throws IOException {
if (node.getType() != null) {
writeEndTag(node.getType().getNamespace(), node.getType().getLocalName());
var nodeTypeQName = new QName(node.getType());
writeEndTag(nodeTypeQName.getNamespace(), nodeTypeQName.getLocalName());
} else {
writeEndTag(RDF.NAMESPACE, "Description");
}
Expand All @@ -442,7 +449,8 @@ private void writeNodeEmptyTag(Node node) throws IOException, RDFHandlerExceptio
* Write out an empty property element.
*/
private void writeAbbreviatedPredicate(IRI pred, Value obj) throws IOException, RDFHandlerException {
writeStartOfStartTag(pred.getNamespace(), pred.getLocalName());
var predQName = new QName(pred);
writeStartOfStartTag(predQName.getNamespace(), predQName.getLocalName());

if (obj instanceof Resource) {
Resource objRes = (Resource) obj;
Expand Down Expand Up @@ -484,7 +492,7 @@ private void writeAbbreviatedPredicate(IRI pred, Value obj) throws IOException,
writeCharacterData(objLit.getLabel());
}

writeEndTag(pred.getNamespace(), pred.getLocalName());
writeEndTag(predQName.getNamespace(), predQName.getLocalName());
}

writeNewLine();
Expand Down Expand Up @@ -565,4 +573,31 @@ public boolean isWritten() {
return isWritten;
}
}

private static class QName {
private static final Pattern VALID_XML_ELEMENT_NAME = Pattern.compile("[a-zA-Z_][a-zA-Z0-9_\\-\\.]*");

private final String namespace;
private final String localName;

public QName(IRI resource) {
if (!VALID_XML_ELEMENT_NAME.matcher(resource.getLocalName()).matches()) {
var iriString = resource.getNamespace() + resource.getLocalName();
var sep = XMLUtil.findURISplitIndex(iriString);
namespace = iriString.substring(0, sep);
localName = iriString.substring(sep);
} else {
localName = resource.getLocalName();
namespace = resource.getNamespace();
}
}

public String getLocalName() {
return localName;
}

public String getNamespace() {
return namespace;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,20 @@ public static String encodeURIString(String s) {
return s;
}

public static boolean isValidPrefixedName(String s) {
if (s == null || s.isEmpty()) {
return false;
}

if (!isPN_CHARS_BASE(s.codePointAt(0))) {
return false;
}

return s.codePoints() //
.skip(1) // Skip the first code point
.allMatch(TurtleUtil::isPN_CHARS);
}

/**
* Decodes an encoded Turtle string. Any \-escape sequences are substituted with their decoded value.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,11 +537,20 @@ protected void writeResource(Resource res, boolean canShorten) throws IOExceptio
}

protected void writeURI(IRI uri) throws IOException {
String uriString = uri.toString();

// Try to find a prefix for the URI's namespace
String prefix = null;
if (TurtleUtil.isValidPrefixedName(uri.getLocalName())) {
prefix = namespaceTable.get(uri.getNamespace());
if (prefix != null) {
// Namespace is mapped to a prefix; write abbreviated URI
writer.write(prefix);
writer.write(":");
writer.write(uri.getLocalName());
return;
}
}

// Try to find a prefix for the URI's namespace
String uriString = uri.toString();
int splitIdx = TurtleUtil.findURISplitIndex(uriString);
if (splitIdx > 0) {
String namespace = uriString.substring(0, splitIdx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@
*******************************************************************************/
package org.eclipse.rdf4j.rio.turtle;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.StringReader;
import java.io.StringWriter;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.impl.DynamicModelFactory;
import org.eclipse.rdf4j.model.util.Models;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.Rio;
import org.eclipse.rdf4j.rio.WriterConfig;
Expand Down Expand Up @@ -95,7 +98,6 @@ public void testBlankNodeInlining1() throws Exception {
Model actual = Rio.parse(new StringReader(stringWriter.toString()), "", RDFFormat.TURTLE);

assertTrue(Models.isomorphic(expected, actual));

}

@Test
Expand Down Expand Up @@ -192,6 +194,56 @@ public void testNoBuffering() throws Exception {
assertTrue(Models.isomorphic(expected, actual));
}

@Test
public void testUnusualIrisAndPrefixesParseWriteCompare() throws Exception {
String data = "@prefix server-news: <news:comp.infosystems.www.servers.> .\n" +
"@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n" +
"server-news:unix rdfs:label \"News on Unix\" .\n" +
"server-news:windows rdfs:label \"News on Windows\" .\n";

var expected = Rio.parse(new StringReader(data), "", RDFFormat.TURTLE);

var stringWriter = new StringWriter();
var config = new WriterConfig();
config.set(BasicWriterSettings.INLINE_BLANK_NODES, false);
config.set(BasicWriterSettings.PRETTY_PRINT, false);
Rio.write(expected, stringWriter, RDFFormat.TURTLE, config);

var actual = Rio.parse(new StringReader(stringWriter.toString()), "", RDFFormat.TURTLE);
assertThat(Models.isomorphic(expected, actual)).as("isomorphic").isTrue();

// Requires https://github.com/eclipse-rdf4j/rdf4j/issues/4929 to be fixed
// assertThat(stringWriter.toString()).isEqualTo(data);
}

@Test
public void testUnusualIrisAndPrefixesWriteParserWriteCompare() throws Exception {
var prefix = "server-news";
var ns = "news:comp.infosystems.www.servers.";

var config = new WriterConfig();
config.set(BasicWriterSettings.INLINE_BLANK_NODES, false);
config.set(BasicWriterSettings.PRETTY_PRINT, false);

var expectedModel = new DynamicModelFactory().createEmptyModel();
expectedModel.setNamespace(prefix, ns);
expectedModel.setNamespace(RDFS.PREFIX, RDFS.NAMESPACE);
expectedModel.add(vf.createIRI(ns, "unix"), RDFS.LABEL, vf.createLiteral("News on Unix"));
expectedModel.add(vf.createIRI(ns, "windows"), RDFS.LABEL, vf.createLiteral("News on Windows"));

var turtle1 = new StringWriter();
Rio.write(expectedModel, turtle1, RDFFormat.TURTLE, config);

var actualModel = Rio.parse(new StringReader(turtle1.toString()), "", RDFFormat.TURTLE);
assertThat(Models.isomorphic(expectedModel, actualModel)).as("isomorphic").isTrue();

var turtle2 = new StringWriter();
Rio.write(actualModel, turtle2, RDFFormat.TURTLE, config);

// Requires https://github.com/eclipse-rdf4j/rdf4j/issues/4929 to be fixed
// assertThat(turtle2.toString()).isEqualTo(turtle1.toString());
}

@Test
@Disabled
public void anotherBnodeTest() throws Exception {
Expand Down Expand Up @@ -672,6 +724,20 @@ public void testBlankNodeInlining_indirectCircularReferenceWithIRI() throws Exce
assertTrue(Models.isomorphic(expected, actual));
}

@Test
public void testIriNamespace() throws Exception {
Model model = new DynamicModelFactory().createEmptyModel();
String prefix = "foo-bar";
String ns = "foo:this.is.my.bar.";
model.setNamespace(prefix, ns);
model.add(vf.createIRI(ns, "lala"), vf.createIRI(ns, "lulu"), vf.createIRI(ns, "lolo"));

StringWriter stringWriter = new StringWriter();
Rio.write(model, stringWriter, RDFFormat.TURTLE);

assertThat(stringWriter.toString()).contains("foo-bar:lala foo-bar:lulu foo-bar:lolo .");
}

@Test
public void testIgnoreAbbreviateNumbers() throws Exception {
StringWriter sw = new StringWriter();
Expand Down

0 comments on commit 1015439

Please sign in to comment.