Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-5148 Introduce "soft fail" for corrupt ValueStore #5150

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore;
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI;
import org.eclipse.rdf4j.sail.nativerdf.model.NativeLiteral;
Expand Down Expand Up @@ -123,6 +124,11 @@ public class ValueStore extends SimpleValueFactory {
*/
private final ConcurrentCache<String, Integer> namespaceIDCache;

/**
* Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store.
*/
private final boolean softFailOnCorruptData;

/*--------------*
* Constructors *
*--------------*/
Expand All @@ -146,6 +152,15 @@ public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int value
namespaceIDCache = new ConcurrentCache<>(namespaceIDCacheSize);

setNewRevision();

/*
* Soft failure when a ValueStore is corrupt (i.e., one or more NativeValues cannot be read properly) can be
* enabled using the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData (boolean). The
* default behavior is that ValueStore will fail hard with a SailException, whereas softFaileOnCorruptData set
* to true will make ValueStore return instances of CorruptValue if NativeValue cannot be read.
*/
this.softFailOnCorruptData = "true"
.equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));
}

/*---------*
Expand Down Expand Up @@ -526,6 +541,12 @@ private boolean isNamespaceData(byte[] data) {
}

private NativeValue data2value(int id, byte[] data) throws IOException {
if (data.length == 0) {
if (softFailOnCorruptData) {
return new CorruptValue(revision, id);
}
throw new SailException("Empty data array for value with id " + id);
}
switch (data[0]) {
case URI_VALUE:
return data2uri(id, data);
Expand All @@ -534,7 +555,10 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
case LITERAL_VALUE:
return data2literal(id, data);
default:
throw new IllegalArgumentException("Invalid type " + data[0] + " for value with id " + id);
if (softFailOnCorruptData) {
return new CorruptValue(revision, id);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could potentially include data[0] at this point.

}
throw new SailException("Invalid type " + data[0] + " for value with id " + id);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors, Aduna, and others.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.sail.nativerdf.model;

import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

/**
* CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
*
* There is no method isCorruptValue() as it would exist for a "regular" implementation of NativeValue. Since
* CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using
* "instanceof".
*
* @author Hannes Ebner
*/
public class CorruptValue implements NativeValue {

/*-----------*
* Constants *
*-----------*/

private static final long serialVersionUID = 8829067881854394802L;

/*----------*
* Variables *
*----------*/

private volatile ValueStoreRevision revision;

private volatile int internalID;

/*--------------*
* Constructors *
*--------------*/

public CorruptValue(ValueStoreRevision revision, int internalID) {
setInternalID(internalID, revision);
}

/*---------*
* Methods *
*---------*/

@Override
public void setInternalID(int internalID, ValueStoreRevision revision) {
this.internalID = internalID;
this.revision = revision;
}

@Override
public ValueStoreRevision getValueStoreRevision() {
return revision;
}

@Override
public int getInternalID() {
return internalID;
}

public String stringValue() {
return Integer.toString(internalID);
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}

if (o instanceof CorruptValue && internalID != NativeValue.UNKNOWN_ID) {
CorruptValue otherCorruptValue = (CorruptValue) o;

if (otherCorruptValue.internalID != NativeValue.UNKNOWN_ID && revision.equals(otherCorruptValue.revision)) {
// CorruptValue is from the same revision of the same native store with both IDs set
return internalID == otherCorruptValue.internalID;
}
}

return super.equals(o);
}

}
Loading