From 475d09e49993dffdb209071dccc111979b84739f Mon Sep 17 00:00:00 2001 From: Hannes Ebner Date: Thu, 10 Oct 2024 10:05:29 +0200 Subject: [PATCH 1/2] GH-5148 Introduce "soft fail" for corrupt ValueStore --- .../rdf4j/sail/nativerdf/ValueStore.java | 26 +++++- .../sail/nativerdf/model/CorruptValue.java | 91 +++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index 7193c7e8342..401d8705ff1 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -33,6 +33,7 @@ import org.eclipse.rdf4j.model.vocabulary.XSD; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue; import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode; import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI; import org.eclipse.rdf4j.sail.nativerdf.model.NativeLiteral; @@ -123,6 +124,11 @@ public class ValueStore extends SimpleValueFactory { */ private final ConcurrentCache namespaceIDCache; + /** + * Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store. + */ + private final boolean softFailOnCorruptData; + /*--------------* * Constructors * *--------------*/ @@ -146,6 +152,15 @@ public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int value namespaceIDCache = new ConcurrentCache<>(namespaceIDCacheSize); setNewRevision(); + + /* + * Soft failure when a ValueStore is corrupt (i.e., one or more NativeValues cannot be read properly) can be + * enabled using the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData (boolean). The + * default behavior is that ValueStore will fail hard with a SailException, whereas softFaileOnCorruptData set + * to true will make ValueStore return instances of CorruptValue if NativeValue cannot be read. + */ + this.softFailOnCorruptData = "true" + .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData")); } /*---------* @@ -526,6 +541,12 @@ private boolean isNamespaceData(byte[] data) { } private NativeValue data2value(int id, byte[] data) throws IOException { + if (data.length == 0) { + if (softFailOnCorruptData) { + return new CorruptValue(revision, id); + } + throw new SailException("Empty data array for value with id " + id); + } switch (data[0]) { case URI_VALUE: return data2uri(id, data); @@ -534,7 +555,10 @@ private NativeValue data2value(int id, byte[] data) throws IOException { case LITERAL_VALUE: return data2literal(id, data); default: - throw new IllegalArgumentException("Invalid type " + data[0] + " for value with id " + id); + if (softFailOnCorruptData) { + return new CorruptValue(revision, id); + } + throw new SailException("Invalid type " + data[0] + " for value with id " + id); } } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java new file mode 100644 index 00000000000..ec713a38360 --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java @@ -0,0 +1,91 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.nativerdf.model; + +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). + * + * There is no method isCorruptValue() is it would exist for a "regular" implementation of NativeValue. Since + * CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using + * "instanceof". + * + * @author Hannes Ebner + */ +public class CorruptValue implements NativeValue { + + /*-----------* + * Constants * + *-----------*/ + + private static final long serialVersionUID = 8829067881854394802L; + + /*----------* + * Variables * + *----------*/ + + private volatile ValueStoreRevision revision; + + private volatile int internalID; + + /*--------------* + * Constructors * + *--------------*/ + + public CorruptValue(ValueStoreRevision revision, int internalID) { + setInternalID(internalID, revision); + } + + /*---------* + * Methods * + *---------*/ + + @Override + public void setInternalID(int internalID, ValueStoreRevision revision) { + this.internalID = internalID; + this.revision = revision; + } + + @Override + public ValueStoreRevision getValueStoreRevision() { + return revision; + } + + @Override + public int getInternalID() { + return internalID; + } + + public String stringValue() { + return Integer.toString(internalID); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptValue && internalID != NativeValue.UNKNOWN_ID) { + CorruptValue otherCorruptValue = (CorruptValue) o; + + if (otherCorruptValue.internalID != NativeValue.UNKNOWN_ID && revision.equals(otherCorruptValue.revision)) { + // CorruptValue is from the same revision of the same native store with both IDs set + return internalID == otherCorruptValue.internalID; + } + } + + return super.equals(o); + } + +} \ No newline at end of file From 029df5242d99173e7be5c79cdd67d581d32d81fe Mon Sep 17 00:00:00 2001 From: Hannes Ebner Date: Thu, 10 Oct 2024 10:31:15 +0200 Subject: [PATCH 2/2] GH-5148 Fixed typo --- .../org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java index ec713a38360..933bf9f6149 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java @@ -16,7 +16,7 @@ * CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see * ValueStore#softFailOnCorruptData). * - * There is no method isCorruptValue() is it would exist for a "regular" implementation of NativeValue. Since + * There is no method isCorruptValue() as it would exist for a "regular" implementation of NativeValue. Since * CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using * "instanceof". *