Skip to content

Commit

Permalink
GH-5148 corrupt data can be written as NQuads
Browse files Browse the repository at this point in the history
  • Loading branch information
hmottestad committed Oct 23, 2024
1 parent 196cf9d commit 0c58aac
Show file tree
Hide file tree
Showing 6 changed files with 244 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI;
Expand Down Expand Up @@ -225,10 +226,12 @@ public <T extends NativeValue & Resource> T getResource(int id) throws IOExcepti

NativeValue resultValue = getValue(id);

if (!(resultValue instanceof Resource)) {
if (resultValue != null && !(resultValue instanceof Resource)) {
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
}
logger.warn(
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
}

return (T) resultValue;
Expand All @@ -245,10 +248,15 @@ public <T extends NativeValue & IRI> T getIRI(int id) throws IOException {

NativeValue resultValue = getValue(id);

if (!(resultValue instanceof Resource)) {
if (resultValue != null && !(resultValue instanceof IRI)) {
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
if (resultValue instanceof CorruptIRI) {
return (T) resultValue;
}
return (T) new CorruptIRI(revision, id, null, ((CorruptValue) resultValue).getData());
}
logger.warn(
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
}

return (T) resultValue;
Expand Down Expand Up @@ -584,9 +592,10 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
if (data.length == 0) {
if (SOFT_FAIL_ON_CORRUPT_DATA) {
logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id);
return new CorruptValue(revision, id, data);
return new CorruptUnknownValue(revision, id, data);
}
throw new SailException("Empty data array for value with id " + id);
throw new SailException("Empty data array for value with id " + id
+ " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
}
switch (data[0]) {
case URI_VALUE:
Expand All @@ -598,24 +607,29 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
default:
if (SOFT_FAIL_ON_CORRUPT_DATA) {
logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id);
return new CorruptValue(revision, id, data);
return new CorruptUnknownValue(revision, id, data);
}
throw new SailException("Invalid type " + data[0] + " for value with id " + id);
throw new SailException("Invalid type " + data[0] + " for value with id " + id
+ " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
}
}

private <T extends IRI & NativeValue> T data2uri(int id, byte[] data) throws IOException {
String namespace = null;

try {
int nsID = ByteArrayUtil.getInt(data, 1);
String namespace = getNamespace(nsID);
namespace = getNamespace(nsID);

String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);

return (T) new NativeIRI(revision, namespace, localName, id);
} catch (Throwable e) {
if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
return (T) new CorruptIRI(revision, id, data);
return (T) new CorruptIRI(revision, id, namespace, data);
}
logger.error(
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
throw e;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@

package org.eclipse.rdf4j.sail.nativerdf.model;

import java.nio.charset.StandardCharsets;

import org.apache.commons.codec.binary.Hex;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

import com.google.common.net.UrlEscapers;

/**
* CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
Expand All @@ -23,22 +28,48 @@
public class CorruptIRI extends CorruptValue implements IRI {

private static final long serialVersionUID = -6995615243794525852L;
private final String namespace;

public CorruptIRI(ValueStoreRevision revision, int internalID, byte[] data) {
public CorruptIRI(ValueStoreRevision revision, int internalID, String namespace, byte[] data) {
super(revision, internalID, data);
this.namespace = namespace;
}

@Override
public String toString() {
return stringValue();
}

public String stringValue() {
try {
return getNamespace() + ":" + getLocalName();
} catch (Throwable ignored) {
}

return "CorruptIRI_with_ID_" + getInternalID();
}

@Override
public String getNamespace() {
return "CORRUPT";
if (namespace != null && !namespace.isEmpty()) {
return namespace;
}
return "urn:CorruptIRI:";
}

@Override
public String getLocalName() {
byte[] data = getData();
if (data != null && data.length < 1024) {
try {
String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
return "CORRUPT_" + UrlEscapers.urlPathSegmentEscaper().escape(localName);
} catch (Throwable ignored) {
}

return "CORRUPT_" + Hex.encodeHexString(data);
}

return "CORRUPT";
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@

package org.eclipse.rdf4j.sail.nativerdf.model;

import java.nio.charset.StandardCharsets;

import org.apache.commons.codec.binary.Hex;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

import com.google.common.net.UrlEscapers;

/**
* CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
Expand All @@ -29,17 +34,38 @@ public CorruptIRIOrBNode(ValueStoreRevision revision, int internalID, byte[] dat
super(revision, internalID, data);
}

@Override
public String toString() {
return stringValue();
}

public String stringValue() {
return "CorruptIRI_with_ID_" + getInternalID();
try {
return getNamespace() + ":" + getLocalName();
} catch (Throwable ignored) {
}

return "CorruptIRIOrBNode_with_ID_" + getInternalID();
}

@Override
public String getNamespace() {
return "CORRUPT";
return "urn:CorruptIRIOrBNode:";
}

@Override
public String getLocalName() {
byte[] data = getData();
if (data != null && data.length < 1024) {
try {
String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
return "CORRUPT_" + UrlEscapers.urlPathSegmentEscaper().escape(localName);
} catch (Throwable ignored) {
}

return "CORRUPT_" + Hex.encodeHexString(data);
}

return "CORRUPT";
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.Optional;

import javax.xml.datatype.XMLGregorianCalendar;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

/**
Expand All @@ -32,6 +34,8 @@ public class CorruptLiteral extends CorruptValue implements Literal {

private static final long serialVersionUID = -2510885288827542623L;

private static final IRI CORRUPT = Values.iri("urn:corrupt");

public CorruptLiteral(ValueStoreRevision revision, int internalID, byte[] data) {
super(revision, internalID, data);
}
Expand All @@ -42,7 +46,15 @@ public String stringValue() {

@Override
public String getLabel() {
return "";
byte[] data = getData();
try {
if (data != null && data.length < 1024) {
return "CorruptUnknownValue with ID " + getInternalID() + " with possible data: "
+ new String(data, StandardCharsets.UTF_8);
}
} catch (Throwable ignored) {
}
return "CorruptUnknownValue_with_ID_" + getInternalID();
}

@Override
Expand All @@ -52,7 +64,7 @@ public Optional<String> getLanguage() {

@Override
public IRI getDatatype() {
return null;
return CORRUPT;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/

package org.eclipse.rdf4j.sail.nativerdf.model;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.Optional;

import javax.xml.datatype.XMLGregorianCalendar;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

/**
* CorruptUnknownValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData). Since a type is needed
*
* @author Håvard M. Ottestad
*/
public class CorruptUnknownValue extends CorruptValue implements Literal {

private static final long serialVersionUID = -6650510290226676279L;

public CorruptUnknownValue(ValueStoreRevision revision, int internalID, byte[] data) {
super(revision, internalID, data);
}

@Override
public String getLabel() {
byte[] data = getData();
try {
if (data != null && data.length < 1024) {
return "CorruptUnknownValue with ID " + getInternalID() + " with possible data: "
+ new String(data, StandardCharsets.UTF_8);
}
} catch (Throwable ignored) {
}
return "CorruptUnknownValue_with_ID_" + getInternalID();
}

@Override
public Optional<String> getLanguage() {
return Optional.empty();
}

@Override
public IRI getDatatype() {
return XSD.STRING;
}

@Override
public boolean booleanValue() {
return false;
}

@Override
public byte byteValue() {
return 0;
}

@Override
public short shortValue() {
return 0;
}

@Override
public int intValue() {
return 0;
}

@Override
public long longValue() {
return 0;
}

@Override
public BigInteger integerValue() {
return null;
}

@Override
public BigDecimal decimalValue() {
return null;
}

@Override
public float floatValue() {
return 0;
}

@Override
public double doubleValue() {
return 0;
}

@Override
public XMLGregorianCalendar calendarValue() {
return null;
}

@Override
public CoreDatatype getCoreDatatype() {
return null;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}

if (o instanceof CorruptUnknownValue && getInternalID() != NativeValue.UNKNOWN_ID) {
CorruptUnknownValue otherCorruptValue = (CorruptUnknownValue) o;

if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
&& getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
// CorruptValue is from the same revision of the same native store with both IDs set
return getInternalID() == otherCorruptValue.getInternalID();
}
}

return super.equals(o);
}

}
Loading

0 comments on commit 0c58aac

Please sign in to comment.