From 475d09e49993dffdb209071dccc111979b84739f Mon Sep 17 00:00:00 2001 From: Hannes Ebner Date: Thu, 10 Oct 2024 10:05:29 +0200 Subject: [PATCH 01/12] GH-5148 Introduce "soft fail" for corrupt ValueStore --- .../rdf4j/sail/nativerdf/ValueStore.java | 26 +++++- .../sail/nativerdf/model/CorruptValue.java | 91 +++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index 7193c7e8342..401d8705ff1 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -33,6 +33,7 @@ import org.eclipse.rdf4j.model.vocabulary.XSD; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue; import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode; import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI; import org.eclipse.rdf4j.sail.nativerdf.model.NativeLiteral; @@ -123,6 +124,11 @@ public class ValueStore extends SimpleValueFactory { */ private final ConcurrentCache namespaceIDCache; + /** + * Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store. + */ + private final boolean softFailOnCorruptData; + /*--------------* * Constructors * *--------------*/ @@ -146,6 +152,15 @@ public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int value namespaceIDCache = new ConcurrentCache<>(namespaceIDCacheSize); setNewRevision(); + + /* + * Soft failure when a ValueStore is corrupt (i.e., one or more NativeValues cannot be read properly) can be + * enabled using the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData (boolean). The + * default behavior is that ValueStore will fail hard with a SailException, whereas softFaileOnCorruptData set + * to true will make ValueStore return instances of CorruptValue if NativeValue cannot be read. + */ + this.softFailOnCorruptData = "true" + .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData")); } /*---------* @@ -526,6 +541,12 @@ private boolean isNamespaceData(byte[] data) { } private NativeValue data2value(int id, byte[] data) throws IOException { + if (data.length == 0) { + if (softFailOnCorruptData) { + return new CorruptValue(revision, id); + } + throw new SailException("Empty data array for value with id " + id); + } switch (data[0]) { case URI_VALUE: return data2uri(id, data); @@ -534,7 +555,10 @@ private NativeValue data2value(int id, byte[] data) throws IOException { case LITERAL_VALUE: return data2literal(id, data); default: - throw new IllegalArgumentException("Invalid type " + data[0] + " for value with id " + id); + if (softFailOnCorruptData) { + return new CorruptValue(revision, id); + } + throw new SailException("Invalid type " + data[0] + " for value with id " + id); } } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java new file mode 100644 index 00000000000..ec713a38360 --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java @@ -0,0 +1,91 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.nativerdf.model; + +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). + * + * There is no method isCorruptValue() is it would exist for a "regular" implementation of NativeValue. Since + * CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using + * "instanceof". + * + * @author Hannes Ebner + */ +public class CorruptValue implements NativeValue { + + /*-----------* + * Constants * + *-----------*/ + + private static final long serialVersionUID = 8829067881854394802L; + + /*----------* + * Variables * + *----------*/ + + private volatile ValueStoreRevision revision; + + private volatile int internalID; + + /*--------------* + * Constructors * + *--------------*/ + + public CorruptValue(ValueStoreRevision revision, int internalID) { + setInternalID(internalID, revision); + } + + /*---------* + * Methods * + *---------*/ + + @Override + public void setInternalID(int internalID, ValueStoreRevision revision) { + this.internalID = internalID; + this.revision = revision; + } + + @Override + public ValueStoreRevision getValueStoreRevision() { + return revision; + } + + @Override + public int getInternalID() { + return internalID; + } + + public String stringValue() { + return Integer.toString(internalID); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptValue && internalID != NativeValue.UNKNOWN_ID) { + CorruptValue otherCorruptValue = (CorruptValue) o; + + if (otherCorruptValue.internalID != NativeValue.UNKNOWN_ID && revision.equals(otherCorruptValue.revision)) { + // CorruptValue is from the same revision of the same native store with both IDs set + return internalID == otherCorruptValue.internalID; + } + } + + return super.equals(o); + } + +} \ No newline at end of file From 029df5242d99173e7be5c79cdd67d581d32d81fe Mon Sep 17 00:00:00 2001 From: Hannes Ebner Date: Thu, 10 Oct 2024 10:31:15 +0200 Subject: [PATCH 02/12] GH-5148 Fixed typo --- .../org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java index ec713a38360..933bf9f6149 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java @@ -16,7 +16,7 @@ * CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see * ValueStore#softFailOnCorruptData). * - * There is no method isCorruptValue() is it would exist for a "regular" implementation of NativeValue. Since + * There is no method isCorruptValue() as it would exist for a "regular" implementation of NativeValue. Since * CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using * "instanceof". * From cad4af9966883dddf9323bd401b76b16a57bbfaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Wed, 23 Oct 2024 11:36:36 +0200 Subject: [PATCH 03/12] GH-5148 fixes based on review --- .../rdf4j/sail/nativerdf/ValueStore.java | 4 +- .../sail/nativerdf/model/CorruptValue.java | 42 +++++++++---------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index 401d8705ff1..d24541e3d1f 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -543,7 +543,7 @@ private boolean isNamespaceData(byte[] data) { private NativeValue data2value(int id, byte[] data) throws IOException { if (data.length == 0) { if (softFailOnCorruptData) { - return new CorruptValue(revision, id); + return new CorruptValue(revision, id, data); } throw new SailException("Empty data array for value with id " + id); } @@ -556,7 +556,7 @@ private NativeValue data2value(int id, byte[] data) throws IOException { return data2literal(id, data); default: if (softFailOnCorruptData) { - return new CorruptValue(revision, id); + return new CorruptValue(revision, id, data); } throw new SailException("Invalid type " + data[0] + " for value with id " + id); } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java index 933bf9f6149..53a3576e443 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2024 Eclipse RDF4J contributors, Aduna, and others. + * Copyright (c) 2024 Eclipse RDF4J contributors. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Distribution License v1.0 @@ -7,7 +7,8 @@ * http://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ + ******************************************************************************/ + package org.eclipse.rdf4j.sail.nativerdf.model; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; @@ -15,7 +16,7 @@ /** * CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see * ValueStore#softFailOnCorruptData). - * + *

* There is no method isCorruptValue() as it would exist for a "regular" implementation of NativeValue. Since * CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using * "instanceof". @@ -24,32 +25,17 @@ */ public class CorruptValue implements NativeValue { - /*-----------* - * Constants * - *-----------*/ - private static final long serialVersionUID = 8829067881854394802L; - /*----------* - * Variables * - *----------*/ - + private final byte[] data; private volatile ValueStoreRevision revision; - private volatile int internalID; - /*--------------* - * Constructors * - *--------------*/ - - public CorruptValue(ValueStoreRevision revision, int internalID) { + public CorruptValue(ValueStoreRevision revision, int internalID, byte[] data) { setInternalID(internalID, revision); + this.data = data; } - /*---------* - * Methods * - *---------*/ - @Override public void setInternalID(int internalID, ValueStoreRevision revision) { this.internalID = internalID; @@ -67,7 +53,17 @@ public int getInternalID() { } public String stringValue() { - return Integer.toString(internalID); + return "CorruptValue_with_ID_" + internalID; + } + + /** + * Returns the bytes that were read from the ValueStore for this value's internalID. Since the value is corrupt the + * data may be null or an empty array. + * + * @return null, empty array or corrupt data + */ + public byte[] getData() { + return data; } @Override @@ -88,4 +84,4 @@ public boolean equals(Object o) { return super.equals(o); } -} \ No newline at end of file +} From 196cf9d5bf48e0a3dcafc3dcd728c50f8754a712 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Wed, 23 Oct 2024 13:24:42 +0200 Subject: [PATCH 04/12] GH-5148 add tests and extend corruption handling to more parts of the code --- .../nativerdf/NativeStatementIterator.java | 6 +- .../rdf4j/sail/nativerdf/ValueStore.java | 142 +++++++---- .../sail/nativerdf/model/CorruptIRI.java | 64 +++++ .../nativerdf/model/CorruptIRIOrBNode.java | 70 ++++++ .../sail/nativerdf/model/CorruptLiteral.java | 132 +++++++++++ .../NativeSailStoreCorruptionTest.java | 224 ++++++++++++++++++ .../documentation/programming/repository.md | 3 + 7 files changed, 596 insertions(+), 45 deletions(-) create mode 100644 core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java create mode 100644 core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java create mode 100644 core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java create mode 100644 core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java index 6d8c84cfa9c..b8776bdaf83 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java @@ -61,10 +61,10 @@ public Statement getNextElement() throws SailException { } int subjID = ByteArrayUtil.getInt(nextValue, TripleStore.SUBJ_IDX); - Resource subj = (Resource) valueStore.getValue(subjID); + Resource subj = valueStore.getResource(subjID); int predID = ByteArrayUtil.getInt(nextValue, TripleStore.PRED_IDX); - IRI pred = (IRI) valueStore.getValue(predID); + IRI pred = valueStore.getIRI(predID); int objID = ByteArrayUtil.getInt(nextValue, TripleStore.OBJ_IDX); Value obj = valueStore.getValue(objID); @@ -72,7 +72,7 @@ public Statement getNextElement() throws SailException { Resource context = null; int contextID = ByteArrayUtil.getInt(nextValue, TripleStore.CONTEXT_IDX); if (contextID != 0) { - context = (Resource) valueStore.getValue(contextID); + context = valueStore.getResource(contextID); } return valueStore.createStatement(subj, pred, obj, context); diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index d24541e3d1f..c3f7a835c50 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -33,12 +33,17 @@ import org.eclipse.rdf4j.model.vocabulary.XSD; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral; import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue; import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode; import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI; import org.eclipse.rdf4j.sail.nativerdf.model.NativeLiteral; import org.eclipse.rdf4j.sail.nativerdf.model.NativeResource; import org.eclipse.rdf4j.sail.nativerdf.model.NativeValue; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * File-based indexed storage and retrieval of RDF values. ValueStore maps RDF values to integer IDs and vice-versa. @@ -50,9 +55,7 @@ @InternalUseOnly public class ValueStore extends SimpleValueFactory { - /*-----------* - * Constants * - *-----------*/ + private static final Logger logger = LoggerFactory.getLogger(ValueStore.class); /** * The default value cache size. @@ -127,7 +130,8 @@ public class ValueStore extends SimpleValueFactory { /** * Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store. */ - private final boolean softFailOnCorruptData; + public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true" + .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));; /*--------------* * Constructors * @@ -153,14 +157,6 @@ public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int value setNewRevision(); - /* - * Soft failure when a ValueStore is corrupt (i.e., one or more NativeValues cannot be read properly) can be - * enabled using the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData (boolean). The - * default behavior is that ValueStore will fail hard with a SailException, whereas softFaileOnCorruptData set - * to true will make ValueStore return instances of CorruptValue if NativeValue cannot be read. - */ - this.softFailOnCorruptData = "true" - .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData")); } /*---------* @@ -195,6 +191,7 @@ public Lock getReadLock() throws InterruptedException { * @throws IOException If an I/O error occurred. */ public NativeValue getValue(int id) throws IOException { + // Check value cache Integer cacheID = id; NativeValue resultValue = valueCache.get(cacheID); @@ -206,12 +203,55 @@ public NativeValue getValue(int id) throws IOException { if (data != null) { resultValue = data2value(id, data); - // Store value in cache - valueCache.put(cacheID, resultValue); + if (!(resultValue instanceof CorruptValue)) { + // Store value in cache + valueCache.put(cacheID, resultValue); + } } } return resultValue; + + } + + /** + * Gets the Resource for the specified ID. + * + * @param id A value ID. + * @return The Resource for the ID, or null no such value could be found. + * @throws IOException If an I/O error occurred. + */ + public T getResource(int id) throws IOException { + + NativeValue resultValue = getValue(id); + + if (!(resultValue instanceof Resource)) { + if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) { + return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData()); + } + } + + return (T) resultValue; + } + + /** + * Gets the IRI for the specified ID. + * + * @param id A value ID. + * @return The IRI for the ID, or null no such value could be found. + * @throws IOException If an I/O error occurred. + */ + public T getIRI(int id) throws IOException { + + NativeValue resultValue = getValue(id); + + if (!(resultValue instanceof Resource)) { + if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) { + return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData()); + } + } + + return (T) resultValue; } /** @@ -542,7 +582,8 @@ private boolean isNamespaceData(byte[] data) { private NativeValue data2value(int id, byte[] data) throws IOException { if (data.length == 0) { - if (softFailOnCorruptData) { + if (SOFT_FAIL_ON_CORRUPT_DATA) { + logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id); return new CorruptValue(revision, id, data); } throw new SailException("Empty data array for value with id " + id); @@ -555,20 +596,29 @@ private NativeValue data2value(int id, byte[] data) throws IOException { case LITERAL_VALUE: return data2literal(id, data); default: - if (softFailOnCorruptData) { + if (SOFT_FAIL_ON_CORRUPT_DATA) { + logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id); return new CorruptValue(revision, id, data); } throw new SailException("Invalid type " + data[0] + " for value with id " + id); } } - private NativeIRI data2uri(int id, byte[] data) throws IOException { - int nsID = ByteArrayUtil.getInt(data, 1); - String namespace = getNamespace(nsID); + private T data2uri(int id, byte[] data) throws IOException { + try { + int nsID = ByteArrayUtil.getInt(data, 1); + String namespace = getNamespace(nsID); + + String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8); - String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8); + return (T) new NativeIRI(revision, namespace, localName, id); + } catch (Throwable e) { + if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) { + return (T) new CorruptIRI(revision, id, data); + } + throw e; + } - return new NativeIRI(revision, namespace, localName, id); } private NativeBNode data2bnode(int id, byte[] data) { @@ -576,31 +626,39 @@ private NativeBNode data2bnode(int id, byte[] data) { return new NativeBNode(revision, nodeID, id); } - private NativeLiteral data2literal(int id, byte[] data) throws IOException { - // Get datatype - int datatypeID = ByteArrayUtil.getInt(data, 1); - IRI datatype = null; - if (datatypeID != NativeValue.UNKNOWN_ID) { - datatype = (IRI) getValue(datatypeID); - } + private T data2literal(int id, byte[] data) throws IOException { + try { + // Get datatype + int datatypeID = ByteArrayUtil.getInt(data, 1); + IRI datatype = null; + if (datatypeID != NativeValue.UNKNOWN_ID) { + datatype = (IRI) getValue(datatypeID); + } - // Get language tag - String lang = null; - int langLength = data[5]; - if (langLength > 0) { - lang = new String(data, 6, langLength, StandardCharsets.UTF_8); - } + // Get language tag + String lang = null; + int langLength = data[5]; + if (langLength > 0) { + lang = new String(data, 6, langLength, StandardCharsets.UTF_8); + } - // Get label - String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8); + // Get label + String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8); - if (lang != null) { - return new NativeLiteral(revision, label, lang, id); - } else if (datatype != null) { - return new NativeLiteral(revision, label, datatype, id); - } else { - return new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id); + if (lang != null) { + return (T) new NativeLiteral(revision, label, lang, id); + } else if (datatype != null) { + return (T) new NativeLiteral(revision, label, datatype, id); + } else { + return (T) new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id); + } + } catch (Throwable e) { + if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) { + return (T) new CorruptLiteral(revision, id, data); + } + throw e; } + } private String data2namespace(byte[] data) { diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java new file mode 100644 index 00000000000..2a0f633a19f --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java @@ -0,0 +1,64 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf.model; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). + * + * @author Håvard M. Ottestad + */ +public class CorruptIRI extends CorruptValue implements IRI { + + private static final long serialVersionUID = -6995615243794525852L; + + public CorruptIRI(ValueStoreRevision revision, int internalID, byte[] data) { + super(revision, internalID, data); + } + + public String stringValue() { + return "CorruptIRI_with_ID_" + getInternalID(); + } + + @Override + public String getNamespace() { + return "CORRUPT"; + } + + @Override + public String getLocalName() { + return "CORRUPT"; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptIRI && getInternalID() != NativeValue.UNKNOWN_ID) { + CorruptIRI otherCorruptValue = (CorruptIRI) o; + + if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID + && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) { + // CorruptValue is from the same revision of the same native store with both IDs set + return getInternalID() == otherCorruptValue.getInternalID(); + } + } + + return super.equals(o); + } + +} diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java new file mode 100644 index 00000000000..f06c6ad0164 --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java @@ -0,0 +1,70 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf.model; + +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). + * + * @author Håvard M. Ottestad + */ +public class CorruptIRIOrBNode extends CorruptValue implements IRI, BNode { + + private static final long serialVersionUID = 3709784393454516043L; + + public CorruptIRIOrBNode(ValueStoreRevision revision, int internalID, byte[] data) { + super(revision, internalID, data); + } + + public String stringValue() { + return "CorruptIRI_with_ID_" + getInternalID(); + } + + @Override + public String getNamespace() { + return "CORRUPT"; + } + + @Override + public String getLocalName() { + return "CORRUPT"; + } + + @Override + public String getID() { + return ""; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptIRIOrBNode && getInternalID() != NativeValue.UNKNOWN_ID) { + CorruptIRIOrBNode otherCorruptValue = (CorruptIRIOrBNode) o; + + if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID + && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) { + // CorruptValue is from the same revision of the same native store with both IDs set + return getInternalID() == otherCorruptValue.getInternalID(); + } + } + + return super.equals(o); + } + +} diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java new file mode 100644 index 00000000000..00d3f8cc91e --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java @@ -0,0 +1,132 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf.model; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.Optional; + +import javax.xml.datatype.XMLGregorianCalendar; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.base.CoreDatatype; +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptLiteral is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). + * + * @author Håvard M. Ottestad + */ +public class CorruptLiteral extends CorruptValue implements Literal { + + private static final long serialVersionUID = -2510885288827542623L; + + public CorruptLiteral(ValueStoreRevision revision, int internalID, byte[] data) { + super(revision, internalID, data); + } + + public String stringValue() { + return "CorruptLiteral_with_ID_" + getInternalID(); + } + + @Override + public String getLabel() { + return ""; + } + + @Override + public Optional getLanguage() { + return Optional.empty(); + } + + @Override + public IRI getDatatype() { + return null; + } + + @Override + public boolean booleanValue() { + return false; + } + + @Override + public byte byteValue() { + return 0; + } + + @Override + public short shortValue() { + return 0; + } + + @Override + public int intValue() { + return 0; + } + + @Override + public long longValue() { + return 0; + } + + @Override + public BigInteger integerValue() { + return null; + } + + @Override + public BigDecimal decimalValue() { + return null; + } + + @Override + public float floatValue() { + return 0; + } + + @Override + public double doubleValue() { + return 0; + } + + @Override + public XMLGregorianCalendar calendarValue() { + return null; + } + + @Override + public CoreDatatype getCoreDatatype() { + return null; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptLiteral && getInternalID() != NativeValue.UNKNOWN_ID) { + CorruptLiteral otherCorruptValue = (CorruptLiteral) o; + + if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID + && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) { + // CorruptValue is from the same revision of the same native store with both IDs set + return getInternalID() == otherCorruptValue.getInternalID(); + } + } + + return super.equals(o); + } + +} diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java new file mode 100644 index 00000000000..ba8be3038c6 --- /dev/null +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java @@ -0,0 +1,224 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.util.Values; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.RDFS; +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.RepositoryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests how the NativeStore handles corruption in the data files. + */ +public class NativeSailStoreCorruptionTest { + + private static final Logger logger = LoggerFactory.getLogger(NativeSailStoreCorruptionTest.class); + + @TempDir + File tempFolder; + + protected Repository repo; + + protected final ValueFactory F = SimpleValueFactory.getInstance(); + + private File dataDir; + + @BeforeEach + public void before() throws IOException { + this.dataDir = new File(tempFolder, "dbmodel"); + dataDir.mkdir(); + repo = new SailRepository(new NativeStore(dataDir, "spoc,posc")); + repo.init(); + + IRI CTX_1 = F.createIRI("urn:one"); + IRI CTX_2 = F.createIRI("urn:two"); + + Statement S0 = F.createStatement(F.createIRI("http://example.org/a0"), RDFS.LABEL, F.createLiteral("zero")); + Statement S1 = F.createStatement(F.createIRI("http://example.org/b1"), RDFS.LABEL, F.createLiteral("one")); + Statement S2 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, F.createLiteral("two")); + Statement S3 = F.createStatement(Values.bnode(), RDF.TYPE, Values.bnode()); + Statement S4 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, + F.createLiteral("two", "en")); + Statement S5 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, F.createLiteral(1.2)); + + try (RepositoryConnection conn = repo.getConnection()) { + conn.add(S0); + conn.add(S1, CTX_1); + conn.add(S2, CTX_2); + conn.add(S2, CTX_2); + conn.add(S3, CTX_2); + conn.add(S4, CTX_2); + conn.add(S5, CTX_2); + } + backupFile(dataDir, "values.dat"); + } + + public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException { + + // Use RandomAccessFile in "rw" mode to read and write to the file + try (RandomAccessFile raf = new RandomAccessFile(valuesFile, "rw")) { + // Get the length of the file + long fileLength = raf.length(); + + // Check if the position is within the file bounds + if (pos >= fileLength) { + throw new IOException( + "Attempt to write outside the existing file bounds: " + pos + " >= " + fileLength); + } + + // Move the file pointer to byte position 32 + raf.seek(pos); + + // Write the byte value 0x0 at the current position + raf.writeByte(newVal); + } + } + + public static void backupFile(File dataDir, String s) throws IOException { + File valuesFile = new File(dataDir, s); + File backupFile = new File(dataDir, s + ".bak"); + + if (!valuesFile.exists()) { + throw new IOException("values.dat does not exist and cannot be backed up."); + } + + // Copy values.dat to values.dat.bak + Files.copy(valuesFile.toPath(), backupFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + + public static void restoreFile(File dataDir, String s) throws IOException { + File valuesFile = new File(dataDir, s); + File backupFile = new File(dataDir, s + ".bak"); + + if (!backupFile.exists()) { + throw new IOException("Backup file values.dat.bak does not exist."); + } + + // Copy values.dat.bak back to values.dat + Files.copy(backupFile.toPath(), valuesFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + + @Test + public void testCorruptValuesDatFileNamespace() throws IOException { + repo.shutDown(); + + overwriteByteInFile(new File(dataDir, "values.dat"), 12, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesDatFileNamespaceDatatype() throws IOException { + repo.shutDown(); + + overwriteByteInFile(new File(dataDir, "values.dat"), 96, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesDatFileEmptyDataArrayError() throws IOException { + repo.shutDown(); + + overwriteByteInFile(new File(dataDir, "values.dat"), 173, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesDatFileInvalidTypeError() throws IOException { + repo.shutDown(); + + overwriteByteInFile(new File(dataDir, "values.dat"), 174, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesDatFileEntireValuesDatFile() throws IOException { + for (int i = 4; i < 437; i++) { + logger.debug("Corrupting byte at position " + i); + repo.shutDown(); + restoreFile(dataDir, "values.dat"); + + overwriteByteInFile(new File(dataDir, "values.dat"), i, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + + } + + } + + @NotNull + private List getStatements() { + List list = new ArrayList<>(); + + ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = true; + + try (RepositoryConnection conn = repo.getConnection()) { + try (RepositoryResult statements = conn.getStatements(null, null, null, false)) { + while (statements.hasNext()) { + Statement next = statements.next(); + list.add(next); + logger.debug(next.toString()); + } + } + return list; + } finally { + ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = false; + } + } + + @AfterEach + public void after() { + repo.shutDown(); + } +} diff --git a/site/content/documentation/programming/repository.md b/site/content/documentation/programming/repository.md index d1fa4f8324c..b8ebd6bdb82 100644 --- a/site/content/documentation/programming/repository.md +++ b/site/content/documentation/programming/repository.md @@ -98,6 +98,9 @@ import org.eclipse.rdf4j.sail.nativerdf.NativeStore; Repository repo = new SailRepository(new NativeStore()); ``` +In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData` can be set to `true` to +allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects. + ### Elasticsearch RDF Repository {{< tag " New in RDF4J 3.1" >}} From 0c58aac7ed1c37a07894a33722f5e48c9e7bf572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Wed, 23 Oct 2024 15:51:18 +0200 Subject: [PATCH 05/12] GH-5148 corrupt data can be written as NQuads --- .../rdf4j/sail/nativerdf/ValueStore.java | 32 ++-- .../sail/nativerdf/model/CorruptIRI.java | 35 ++++- .../nativerdf/model/CorruptIRIOrBNode.java | 30 +++- .../sail/nativerdf/model/CorruptLiteral.java | 16 +- .../nativerdf/model/CorruptUnknownValue.java | 138 ++++++++++++++++++ .../NativeSailStoreCorruptionTest.java | 8 + 6 files changed, 244 insertions(+), 15 deletions(-) create mode 100644 core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptUnknownValue.java diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index c3f7a835c50..59dbd4ea3ff 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -36,6 +36,7 @@ import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI; import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode; import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue; import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue; import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode; import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI; @@ -225,10 +226,12 @@ public T getResource(int id) throws IOExcepti NativeValue resultValue = getValue(id); - if (!(resultValue instanceof Resource)) { + if (resultValue != null && !(resultValue instanceof Resource)) { if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) { return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData()); } + logger.warn( + "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); } return (T) resultValue; @@ -245,10 +248,15 @@ public T getIRI(int id) throws IOException { NativeValue resultValue = getValue(id); - if (!(resultValue instanceof Resource)) { + if (resultValue != null && !(resultValue instanceof IRI)) { if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) { - return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData()); + if (resultValue instanceof CorruptIRI) { + return (T) resultValue; + } + return (T) new CorruptIRI(revision, id, null, ((CorruptValue) resultValue).getData()); } + logger.warn( + "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); } return (T) resultValue; @@ -584,9 +592,10 @@ private NativeValue data2value(int id, byte[] data) throws IOException { if (data.length == 0) { if (SOFT_FAIL_ON_CORRUPT_DATA) { logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id); - return new CorruptValue(revision, id, data); + return new CorruptUnknownValue(revision, id, data); } - throw new SailException("Empty data array for value with id " + id); + throw new SailException("Empty data array for value with id " + id + + " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); } switch (data[0]) { case URI_VALUE: @@ -598,24 +607,29 @@ private NativeValue data2value(int id, byte[] data) throws IOException { default: if (SOFT_FAIL_ON_CORRUPT_DATA) { logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id); - return new CorruptValue(revision, id, data); + return new CorruptUnknownValue(revision, id, data); } - throw new SailException("Invalid type " + data[0] + " for value with id " + id); + throw new SailException("Invalid type " + data[0] + " for value with id " + id + + " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); } } private T data2uri(int id, byte[] data) throws IOException { + String namespace = null; + try { int nsID = ByteArrayUtil.getInt(data, 1); - String namespace = getNamespace(nsID); + namespace = getNamespace(nsID); String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8); return (T) new NativeIRI(revision, namespace, localName, id); } catch (Throwable e) { if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) { - return (T) new CorruptIRI(revision, id, data); + return (T) new CorruptIRI(revision, id, namespace, data); } + logger.error( + "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); throw e; } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java index 2a0f633a19f..819e13cae5a 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java @@ -11,9 +11,14 @@ package org.eclipse.rdf4j.sail.nativerdf.model; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.codec.binary.Hex; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; +import com.google.common.net.UrlEscapers; + /** * CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see * ValueStore#softFailOnCorruptData). @@ -23,22 +28,48 @@ public class CorruptIRI extends CorruptValue implements IRI { private static final long serialVersionUID = -6995615243794525852L; + private final String namespace; - public CorruptIRI(ValueStoreRevision revision, int internalID, byte[] data) { + public CorruptIRI(ValueStoreRevision revision, int internalID, String namespace, byte[] data) { super(revision, internalID, data); + this.namespace = namespace; + } + + @Override + public String toString() { + return stringValue(); } public String stringValue() { + try { + return getNamespace() + ":" + getLocalName(); + } catch (Throwable ignored) { + } + return "CorruptIRI_with_ID_" + getInternalID(); } @Override public String getNamespace() { - return "CORRUPT"; + if (namespace != null && !namespace.isEmpty()) { + return namespace; + } + return "urn:CorruptIRI:"; } @Override public String getLocalName() { + byte[] data = getData(); + if (data != null && data.length < 1024) { + try { + String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8); + return "CORRUPT_" + UrlEscapers.urlPathSegmentEscaper().escape(localName); + } catch (Throwable ignored) { + } + + return "CORRUPT_" + Hex.encodeHexString(data); + } + return "CORRUPT"; } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java index f06c6ad0164..740530a21dc 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java @@ -11,10 +11,15 @@ package org.eclipse.rdf4j.sail.nativerdf.model; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.codec.binary.Hex; import org.eclipse.rdf4j.model.BNode; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; +import com.google.common.net.UrlEscapers; + /** * CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see * ValueStore#softFailOnCorruptData). @@ -29,17 +34,38 @@ public CorruptIRIOrBNode(ValueStoreRevision revision, int internalID, byte[] dat super(revision, internalID, data); } + @Override + public String toString() { + return stringValue(); + } + public String stringValue() { - return "CorruptIRI_with_ID_" + getInternalID(); + try { + return getNamespace() + ":" + getLocalName(); + } catch (Throwable ignored) { + } + + return "CorruptIRIOrBNode_with_ID_" + getInternalID(); } @Override public String getNamespace() { - return "CORRUPT"; + return "urn:CorruptIRIOrBNode:"; } @Override public String getLocalName() { + byte[] data = getData(); + if (data != null && data.length < 1024) { + try { + String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8); + return "CORRUPT_" + UrlEscapers.urlPathSegmentEscaper().escape(localName); + } catch (Throwable ignored) { + } + + return "CORRUPT_" + Hex.encodeHexString(data); + } + return "CORRUPT"; } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java index 00d3f8cc91e..0a8bef55523 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java @@ -13,6 +13,7 @@ import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.util.Optional; import javax.xml.datatype.XMLGregorianCalendar; @@ -20,6 +21,7 @@ import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.base.CoreDatatype; +import org.eclipse.rdf4j.model.util.Values; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; /** @@ -32,6 +34,8 @@ public class CorruptLiteral extends CorruptValue implements Literal { private static final long serialVersionUID = -2510885288827542623L; + private static final IRI CORRUPT = Values.iri("urn:corrupt"); + public CorruptLiteral(ValueStoreRevision revision, int internalID, byte[] data) { super(revision, internalID, data); } @@ -42,7 +46,15 @@ public String stringValue() { @Override public String getLabel() { - return ""; + byte[] data = getData(); + try { + if (data != null && data.length < 1024) { + return "CorruptUnknownValue with ID " + getInternalID() + " with possible data: " + + new String(data, StandardCharsets.UTF_8); + } + } catch (Throwable ignored) { + } + return "CorruptUnknownValue_with_ID_" + getInternalID(); } @Override @@ -52,7 +64,7 @@ public Optional getLanguage() { @Override public IRI getDatatype() { - return null; + return CORRUPT; } @Override diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptUnknownValue.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptUnknownValue.java new file mode 100644 index 00000000000..9af37481088 --- /dev/null +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptUnknownValue.java @@ -0,0 +1,138 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.nativerdf.model; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +import javax.xml.datatype.XMLGregorianCalendar; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.base.CoreDatatype; +import org.eclipse.rdf4j.model.vocabulary.XSD; +import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; + +/** + * CorruptUnknownValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see + * ValueStore#softFailOnCorruptData). Since a type is needed + * + * @author Håvard M. Ottestad + */ +public class CorruptUnknownValue extends CorruptValue implements Literal { + + private static final long serialVersionUID = -6650510290226676279L; + + public CorruptUnknownValue(ValueStoreRevision revision, int internalID, byte[] data) { + super(revision, internalID, data); + } + + @Override + public String getLabel() { + byte[] data = getData(); + try { + if (data != null && data.length < 1024) { + return "CorruptUnknownValue with ID " + getInternalID() + " with possible data: " + + new String(data, StandardCharsets.UTF_8); + } + } catch (Throwable ignored) { + } + return "CorruptUnknownValue_with_ID_" + getInternalID(); + } + + @Override + public Optional getLanguage() { + return Optional.empty(); + } + + @Override + public IRI getDatatype() { + return XSD.STRING; + } + + @Override + public boolean booleanValue() { + return false; + } + + @Override + public byte byteValue() { + return 0; + } + + @Override + public short shortValue() { + return 0; + } + + @Override + public int intValue() { + return 0; + } + + @Override + public long longValue() { + return 0; + } + + @Override + public BigInteger integerValue() { + return null; + } + + @Override + public BigDecimal decimalValue() { + return null; + } + + @Override + public float floatValue() { + return 0; + } + + @Override + public double doubleValue() { + return 0; + } + + @Override + public XMLGregorianCalendar calendarValue() { + return null; + } + + @Override + public CoreDatatype getCoreDatatype() { + return null; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o instanceof CorruptUnknownValue && getInternalID() != NativeValue.UNKNOWN_ID) { + CorruptUnknownValue otherCorruptValue = (CorruptUnknownValue) o; + + if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID + && getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) { + // CorruptValue is from the same revision of the same native store with both IDs set + return getInternalID() == otherCorruptValue.getInternalID(); + } + } + + return super.equals(o); + } + +} diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java index ba8be3038c6..a85dd6076b1 100644 --- a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java @@ -16,6 +16,7 @@ import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; +import java.io.StringWriter; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.util.ArrayList; @@ -32,6 +33,9 @@ import org.eclipse.rdf4j.repository.RepositoryConnection; import org.eclipse.rdf4j.repository.RepositoryResult; import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.RDFWriter; +import org.eclipse.rdf4j.rio.Rio; import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -204,6 +208,10 @@ private List getStatements() { ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = true; try (RepositoryConnection conn = repo.getConnection()) { + StringWriter stringWriter = new StringWriter(); + RDFWriter writer = Rio.createWriter(RDFFormat.NQUADS, stringWriter); + conn.export(writer); + logger.debug(stringWriter.toString()); try (RepositoryResult statements = conn.getStatements(null, null, null, false)) { while (statements.hasNext()) { Statement next = statements.next(); From 92f4fe413cdca5ec632c91e4b6ab3962eac10d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad=20=28aider=29?= Date: Thu, 24 Oct 2024 09:36:10 +0200 Subject: [PATCH 06/12] GH-5148 add support for more files in the test cases and add a fix for when the corruption causes large amounts of data to be read --- .gitignore | 1 + .../sail/nativerdf/datastore/DataFile.java | 15 +++++ .../NativeSailStoreCorruptionTest.java | 55 ++++++++++++++++++- 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 75b520bf014..574d708ce85 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ org.eclipse.dash.licenses-1.0.2.jar e2e/node_modules e2e/playwright-report e2e/test-results +.aider* diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java index d369c1649cf..bf3fb4c92dc 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java @@ -18,6 +18,9 @@ import java.util.NoSuchElementException; import org.eclipse.rdf4j.common.io.NioFile; +import org.eclipse.rdf4j.sail.nativerdf.ValueStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class supplying access to a data file. A data file stores data sequentially. Each entry starts with the entry's @@ -27,6 +30,8 @@ */ public class DataFile implements Closeable { + private static final Logger logger = LoggerFactory.getLogger(DataFile.class); + /*-----------* * Constants * *-----------*/ @@ -197,6 +202,16 @@ public byte[] getData(long offset) throws IOException { (data[2] << 8) & 0x0000ff00 | (data[3]) & 0x000000ff; + // If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption. + if (dataLength > 750 * 1024 * 1024) { + if (ValueStore.SOFT_FAIL_ON_CORRUPT_DATA) { + logger.error( + "Data length is {}MB which is larger than 750MB. This is likely data corruption. Truncating length to 32 MB.", + dataLength / ((1024 * 1024))); + dataLength = 32 * 1024 * 1024; + } + } + // We have either managed to read enough data and can return the required subset of the data, or we have read // too little so we need to execute another read to get the correct data. if (dataLength <= data.length - 4) { diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java index a85dd6076b1..bff87354394 100644 --- a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java @@ -88,6 +88,8 @@ public void before() throws IOException { conn.add(S5, CTX_2); } backupFile(dataDir, "values.dat"); + backupFile(dataDir, "values.id"); + backupFile(dataDir, "values.hash"); } public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException { @@ -116,7 +118,7 @@ public static void backupFile(File dataDir, String s) throws IOException { File backupFile = new File(dataDir, s + ".bak"); if (!valuesFile.exists()) { - throw new IOException("values.dat does not exist and cannot be backed up."); + throw new IOException(s + " does not exist and cannot be backed up."); } // Copy values.dat to values.dat.bak @@ -128,7 +130,7 @@ public static void restoreFile(File dataDir, String s) throws IOException { File backupFile = new File(dataDir, s + ".bak"); if (!backupFile.exists()) { - throw new IOException("Backup file values.dat.bak does not exist."); + throw new IOException("Backup file " + s + ".bak does not exist."); } // Copy values.dat.bak back to values.dat @@ -196,9 +198,53 @@ public void testCorruptValuesDatFileEntireValuesDatFile() throws IOException { List list = getStatements(); assertEquals(6, list.size()); + } + } + + @Test + public void testCorruptLastByteOfValuesDatFile() throws IOException { + repo.shutDown(); + File valuesFile = new File(dataDir, "values.dat"); + long fileSize = valuesFile.length(); + + overwriteByteInFile(valuesFile, fileSize - 1, 0x0); + + repo.init(); + + List list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesIdFile() throws IOException { + repo.shutDown(); + File valuesIdFile = new File(dataDir, "values.id"); + long fileSize = valuesIdFile.length(); + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, "values.id"); + overwriteByteInFile(valuesIdFile, i, 0x0); + repo.init(); + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); } + } + @Test + public void testCorruptValuesHashFile() throws IOException { + repo.shutDown(); + File valuesHashFile = new File(dataDir, "values.hash"); + long fileSize = valuesHashFile.length(); + + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, "values.hash"); + overwriteByteInFile(valuesHashFile, i, 0x0); + repo.init(); + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); + } } @NotNull @@ -226,7 +272,10 @@ private List getStatements() { } @AfterEach - public void after() { + public void after() throws IOException { repo.shutDown(); + restoreFile(dataDir, "values.hash"); + restoreFile(dataDir, "values.id"); + restoreFile(dataDir, "values.dat"); } } From b6215bb4f40cefec9d9ecf5af768eab6b57ee838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Thu, 24 Oct 2024 13:17:01 +0200 Subject: [PATCH 07/12] GH-5148 improved soft fail on corruption for values.id and values.hash files. --- .../nativerdf/NativeStatementIterator.java | 16 ++++++ .../rdf4j/sail/nativerdf/NativeStore.java | 10 ++++ .../rdf4j/sail/nativerdf/ValueStore.java | 8 +-- .../sail/nativerdf/datastore/DataFile.java | 52 ++++++++++++------- .../NativeSailStoreCorruptionTest.java | 7 +-- 5 files changed, 62 insertions(+), 31 deletions(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java index b8776bdaf83..9c18795f328 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java @@ -10,6 +10,8 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.nativerdf; +import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA; + import java.io.IOException; import org.eclipse.rdf4j.common.io.ByteArrayUtil; @@ -20,6 +22,9 @@ import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.nativerdf.btree.RecordIterator; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode; +import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue; /** * A statement iterator that wraps a RecordIterator containing statement records and translates these records to @@ -74,6 +79,17 @@ public Statement getNextElement() throws SailException { if (contextID != 0) { context = valueStore.getResource(contextID); } + if (SOFT_FAIL_ON_CORRUPT_DATA) { + if (subj == null) { + subj = new CorruptIRIOrBNode(valueStore.getRevision(), subjID, null); + } + if (pred == null) { + pred = new CorruptIRI(valueStore.getRevision(), predID, null, null); + } + if (obj == null) { + obj = new CorruptUnknownValue(valueStore.getRevision(), objID, null); + } + } return valueStore.createStatement(subj, pred, obj, context); } catch (IOException e) { diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java index c82bca9d4d9..36149498fee 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java @@ -24,6 +24,7 @@ import org.apache.commons.io.FileUtils; import org.eclipse.rdf4j.collection.factory.api.CollectionFactory; import org.eclipse.rdf4j.collection.factory.mapdb.MapDb3CollectionFactory; +import org.eclipse.rdf4j.common.annotation.InternalUseOnly; import org.eclipse.rdf4j.common.concurrent.locks.Lock; import org.eclipse.rdf4j.common.concurrent.locks.LockManager; import org.eclipse.rdf4j.common.io.MavenUtil; @@ -62,6 +63,15 @@ public class NativeStore extends AbstractNotifyingSail implements FederatedServi private static final String VERSION = MavenUtil.loadVersion("org.eclipse.rdf4j", "rdf4j-sail-nativerdf", "devel"); + /** + * Do not throw an exception when corrupt data is detected. Instead, try to return as much data as possible. + * + * Variable can be set through the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData. + */ + @InternalUseOnly + public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true" + .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));; + private static final Cleaner REMOVE_STORES_USED_FOR_MEMORY_OVERFLOW = Cleaner.create(); /** diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index 59dbd4ea3ff..8ed2f979485 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -10,6 +10,8 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.nativerdf; +import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA; + import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; @@ -128,12 +130,6 @@ public class ValueStore extends SimpleValueFactory { */ private final ConcurrentCache namespaceIDCache; - /** - * Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store. - */ - public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true" - .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));; - /*--------------* * Constructors * *--------------*/ diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java index bf3fb4c92dc..10d98ab3b41 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java @@ -10,6 +10,8 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.nativerdf.datastore; +import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA; + import java.io.Closeable; import java.io.File; import java.io.IOException; @@ -18,7 +20,6 @@ import java.util.NoSuchElementException; import org.eclipse.rdf4j.common.io.NioFile; -import org.eclipse.rdf4j.sail.nativerdf.ValueStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -203,8 +204,8 @@ public byte[] getData(long offset) throws IOException { (data[3]) & 0x000000ff; // If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption. - if (dataLength > 750 * 1024 * 1024) { - if (ValueStore.SOFT_FAIL_ON_CORRUPT_DATA) { + if (dataLength > 128 * 1024 * 1024) { + if (SOFT_FAIL_ON_CORRUPT_DATA) { logger.error( "Data length is {}MB which is larger than 750MB. This is likely data corruption. Truncating length to 32 MB.", dataLength / ((1024 * 1024))); @@ -212,29 +213,40 @@ public byte[] getData(long offset) throws IOException { } } - // We have either managed to read enough data and can return the required subset of the data, or we have read - // too little so we need to execute another read to get the correct data. - if (dataLength <= data.length - 4) { + try { - // adjust the approximate average with 1 part actual length and 99 parts previous average up to a sensible - // max of 200 - dataLengthApproximateAverage = (int) (Math.min(200, - ((dataLengthApproximateAverage / 100.0) * 99) + (dataLength / 100.0))); + // We have either managed to read enough data and can return the required subset of the data, or we have + // read + // too little so we need to execute another read to get the correct data. + if (dataLength <= data.length - 4) { - return Arrays.copyOfRange(data, 4, dataLength + 4); + // adjust the approximate average with 1 part actual length and 99 parts previous average up to a + // sensible + // max of 200 + dataLengthApproximateAverage = (int) (Math.min(200, + ((dataLengthApproximateAverage / 100.0) * 99) + (dataLength / 100.0))); - } else { + return Arrays.copyOfRange(data, 4, dataLength + 4); - // adjust the approximate average, but favour the actual dataLength since dataLength predictions misses are - // costly - dataLengthApproximateAverage = Math.min(200, (dataLengthApproximateAverage + dataLength) / 2); + } else { - // we didn't read enough data so we need to execute a new read - data = new byte[dataLength]; - buf = ByteBuffer.wrap(data); - nioFile.read(buf, offset + 4L); + // adjust the approximate average, but favour the actual dataLength since dataLength predictions misses + // are costly + dataLengthApproximateAverage = Math.min(200, (dataLengthApproximateAverage + dataLength) / 2); - return data; + // we didn't read enough data so we need to execute a new read + data = new byte[dataLength]; + buf = ByteBuffer.wrap(data); + nioFile.read(buf, offset + 4L); + + return data; + } + } catch (OutOfMemoryError e) { + if (dataLength > 128 * 1024 * 1024) { + logger.error( + "Trying to read large amounts of data may be a sign of data corruption. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); + } + throw e; } } diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java index bff87354394..14fe7e3279c 100644 --- a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java @@ -251,7 +251,7 @@ public void testCorruptValuesHashFile() throws IOException { private List getStatements() { List list = new ArrayList<>(); - ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = true; + NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true; try (RepositoryConnection conn = repo.getConnection()) { StringWriter stringWriter = new StringWriter(); @@ -267,15 +267,12 @@ private List getStatements() { } return list; } finally { - ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = false; + NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = false; } } @AfterEach public void after() throws IOException { repo.shutDown(); - restoreFile(dataDir, "values.hash"); - restoreFile(dataDir, "values.id"); - restoreFile(dataDir, "values.dat"); } } From 007600343458a37f94e42a4bdfa0a07cb4785747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Thu, 24 Oct 2024 15:03:26 +0200 Subject: [PATCH 08/12] GH-5148 improved handling of corrupt spoc/posc/... indexes --- .../nativerdf/NativeStatementIterator.java | 12 +- .../rdf4j/sail/nativerdf/TripleStore.java | 78 +++++++++- .../rdf4j/sail/nativerdf/btree/BTree.java | 13 ++ .../sail/nativerdf/btree/RangeIterator.java | 7 + .../NativeSailStoreCorruptionTest.java | 135 ++++++++++++++++-- .../documentation/programming/repository.md | 3 +- 6 files changed, 235 insertions(+), 13 deletions(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java index 9c18795f328..eee0d088ac0 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java @@ -25,6 +25,8 @@ import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI; import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode; import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A statement iterator that wraps a RecordIterator containing statement records and translates these records to @@ -32,6 +34,8 @@ */ class NativeStatementIterator extends LookAheadIteration { + private static final Logger logger = LoggerFactory.getLogger(NativeStatementIterator.class); + /*-----------* * Variables * *-----------*/ @@ -59,7 +63,13 @@ public NativeStatementIterator(RecordIterator btreeIter, ValueStore valueStore) @Override public Statement getNextElement() throws SailException { try { - byte[] nextValue = btreeIter.next(); + byte[] nextValue; + try { + nextValue = btreeIter.next(); + } catch (AssertionError | Exception e) { + logger.error("Error while reading next value from btree iterator for {}", btreeIter.toString(), e); + throw e; + } if (nextValue == null) { return null; diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java index c4bc52cd318..a546ee02da5 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java @@ -287,9 +287,71 @@ private Set parseIndexSpecList(String indexSpecStr) throws SailException } private void initIndexes(Set indexSpecs) throws IOException { + + HashSet invalidIndexes = new HashSet<>(); + for (String fieldSeq : indexSpecs) { logger.trace("Initializing index '{}'...", fieldSeq); - indexes.add(new TripleIndex(fieldSeq)); + try { + indexes.add(new TripleIndex(fieldSeq, false)); + } catch (Exception e) { + if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) { + invalidIndexes.add(fieldSeq); + logger.warn("Ignoring index because it failed to initialize index '{}'", fieldSeq, e); + } else { + logger.error( + "Failed to initialize index '{}', consider setting org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true.", + fieldSeq, e); + throw e; + } + + } + + } + + if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) { + indexSpecs.removeAll(invalidIndexes); + } + + List emptyIndexes = new ArrayList<>(); + List nonEmptyIndexes = new ArrayList<>(); + + checkIfIndexesAreEmptyOrNot(nonEmptyIndexes, emptyIndexes); + + if (!emptyIndexes.isEmpty() && !nonEmptyIndexes.isEmpty()) { + if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) { + indexes.removeAll(emptyIndexes); + } else { + for (TripleIndex index : emptyIndexes) { + throw new IOException("Index '" + new String(index.getFieldSeq()) + + "' is unexpectedly empty while other indexes are not. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true. Index file: " + + index.getBTree().getFile().getAbsolutePath()); + } + } + } + + } + + private void checkIfIndexesAreEmptyOrNot(List nonEmptyIndexes, List emptyIndexes) + throws IOException { + for (TripleIndex index : indexes) { + try (RecordIterator recordIterator = index.getBTree().iterateAll()) { + try { + byte[] next = recordIterator.next(); + if (next != null) { + next = recordIterator.next(); + if (next != null) { + nonEmptyIndexes.add(index); + } else { + emptyIndexes.add(index); + } + } else { + emptyIndexes.add(index); + } + } catch (Throwable ignored) { + emptyIndexes.add(index); + } + } } } @@ -355,7 +417,7 @@ private void reindex(Set currentIndexSpecs, Set newIndexSpecs) t for (String fieldSeq : addedIndexSpecs) { logger.debug("Initializing new index '{}'...", fieldSeq); - TripleIndex addedIndex = new TripleIndex(fieldSeq); + TripleIndex addedIndex = new TripleIndex(fieldSeq, true); BTree addedBTree = null; RecordIterator sourceIter = null; try { @@ -1122,7 +1184,17 @@ private class TripleIndex { private final BTree btree; - public TripleIndex(String fieldSeq) throws IOException { + public TripleIndex(String fieldSeq, boolean deleteExistingIndexFile) throws IOException { + if (deleteExistingIndexFile) { + File indexFile = new File(dir, getFilenamePrefix(fieldSeq) + ".dat"); + if (indexFile.exists()) { + indexFile.delete(); + } + File alloxFile = new File(dir, getFilenamePrefix(fieldSeq) + ".alloc"); + if (alloxFile.exists()) { + alloxFile.delete(); + } + } tripleComparator = new TripleComparator(fieldSeq); btree = new BTree(dir, getFilenamePrefix(fieldSeq), 2048, RECORD_LENGTH, tripleComparator, forceSync); } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java index 078f0f8601d..bcf91f51396 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java @@ -292,6 +292,12 @@ public BTree(File dataDir, String filenamePrefix, int blockSize, int valueSize, this.valueSize = buf.getInt(); this.rootNodeID = buf.getInt(); + if (rootNodeID == 0) { + if (nioFile.size() >= 1024) { + throw new IllegalStateException("Root node ID is 0 but file is not empty"); + } + } + if (Arrays.equals(MAGIC_NUMBER, magicNumber)) { if (version > FILE_FORMAT_VERSION) { throw new IOException("Unable to read BTree file " + file + "; it uses a newer file format"); @@ -1117,4 +1123,11 @@ public void print(PrintStream out) throws IOException { out.println("#values = " + valueCount); out.println("---end of BTree file---"); } + + @Override + public String toString() { + return "BTree{" + + "file=" + getFile() + + '}'; + } } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/RangeIterator.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/RangeIterator.java index d7f1617b292..e6a6a3847e6 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/RangeIterator.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/RangeIterator.java @@ -422,4 +422,11 @@ public boolean nodeMergedWith(Node sourceNode, Node targetNode, int mergeIdx) th return deregister; } + + @Override + public String toString() { + return "RangeIterator{" + + "tree=" + tree + + '}'; + } } diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java index 14fe7e3279c..262383429c1 100644 --- a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java @@ -90,6 +90,15 @@ public void before() throws IOException { backupFile(dataDir, "values.dat"); backupFile(dataDir, "values.id"); backupFile(dataDir, "values.hash"); + backupFile(dataDir, "namespaces.dat"); + backupFile(dataDir, "contexts.dat"); + backupFile(dataDir, "triples-posc.alloc"); + backupFile(dataDir, "triples-posc.dat"); + backupFile(dataDir, "triples-spoc.alloc"); + backupFile(dataDir, "triples-spoc.dat"); + + NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true; + } public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException { @@ -234,12 +243,64 @@ public void testCorruptValuesIdFile() throws IOException { @Test public void testCorruptValuesHashFile() throws IOException { repo.shutDown(); - File valuesHashFile = new File(dataDir, "values.hash"); - long fileSize = valuesHashFile.length(); + String file = "values.hash"; + File nativeStoreFile = new File(dataDir, file); + long fileSize = nativeStoreFile.length(); + + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, file); + overwriteByteInFile(nativeStoreFile, i, 0x0); + repo.init(); + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); + } + } + + @Test + public void testCorruptValuesNamespacesFile() throws IOException { + repo.shutDown(); + String file = "namespaces.dat"; + File nativeStoreFile = new File(dataDir, file); + long fileSize = nativeStoreFile.length(); + + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, file); + overwriteByteInFile(nativeStoreFile, i, 0x0); + repo.init(); + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); + } + } + + @Test + public void testCorruptValuesContextsFile() throws IOException { + repo.shutDown(); + String file = "contexts.dat"; + File nativeStoreFile = new File(dataDir, file); + long fileSize = nativeStoreFile.length(); + + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, file); + overwriteByteInFile(nativeStoreFile, i, 0x0); + repo.init(); + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); + } + } + + @Test + public void testCorruptValuesPoscAllocFile() throws IOException { + repo.shutDown(); + String file = "triples-posc.alloc"; + File nativeStoreFile = new File(dataDir, file); + long fileSize = nativeStoreFile.length(); for (long i = 4; i < fileSize; i++) { - restoreFile(dataDir, "values.hash"); - overwriteByteInFile(valuesHashFile, i, 0x0); + restoreFile(dataDir, file); + overwriteByteInFile(nativeStoreFile, i, 0x0); repo.init(); List list = getStatements(); assertEquals(6, list.size(), "Failed at byte position " + i); @@ -247,12 +308,71 @@ public void testCorruptValuesHashFile() throws IOException { } } + @Test + public void testCorruptValuesPoscDataFile() throws IOException { + repo.shutDown(); + String file = "triples-posc.dat"; + File nativeStoreFile = new File(dataDir, file); + long fileSize = nativeStoreFile.length(); + + for (long i = 4; i < fileSize; i++) { + NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true; + restoreFile(dataDir, file); + overwriteByteInFile(nativeStoreFile, i, 0x0); + repo.init(); + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); + } + } + + @Test + public void testCorruptValuesSpocAllocFile() throws IOException { + repo.shutDown(); + String file = "triples-spoc.alloc"; + File nativeStoreFile = new File(dataDir, file); + long fileSize = nativeStoreFile.length(); + + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, file); + overwriteByteInFile(nativeStoreFile, i, 0x0); + repo.init(); + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); + } + } + + @Test + public void testCorruptValuesSpocDataFile() throws IOException { + repo.shutDown(); + String file = "triples-spoc.dat"; + File nativeStoreFile = new File(dataDir, file); + long fileSize = nativeStoreFile.length(); + + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, file); + overwriteByteInFile(nativeStoreFile, i, 0x0); + repo.init(); + try { + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + } catch (Throwable ignored) { + repo.shutDown(); + nativeStoreFile.delete(); + repo.init(); + List list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + } + + repo.shutDown(); + } + } + @NotNull private List getStatements() { List list = new ArrayList<>(); - NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true; - try (RepositoryConnection conn = repo.getConnection()) { StringWriter stringWriter = new StringWriter(); RDFWriter writer = Rio.createWriter(RDFFormat.NQUADS, stringWriter); @@ -266,13 +386,12 @@ private List getStatements() { } } return list; - } finally { - NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = false; } } @AfterEach public void after() throws IOException { + NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = false; repo.shutDown(); } } diff --git a/site/content/documentation/programming/repository.md b/site/content/documentation/programming/repository.md index b8ebd6bdb82..7aab3115d0f 100644 --- a/site/content/documentation/programming/repository.md +++ b/site/content/documentation/programming/repository.md @@ -99,7 +99,8 @@ Repository repo = new SailRepository(new NativeStore()); ``` In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData` can be set to `true` to -allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects. +allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects. Take a backup of all data before setting +this property as it allows the NativeStore to delete corrupt indexes in an attempt to recreate them. Consider this feature experimental and use with caution. ### Elasticsearch RDF Repository From 2e075d526f4115d39c21fad9539cd7cebd2617f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Thu, 24 Oct 2024 15:07:52 +0200 Subject: [PATCH 09/12] GH-5148 cleanup naming and docs --- .../nativerdf/NativeStatementIterator.java | 4 +-- .../rdf4j/sail/nativerdf/NativeStore.java | 8 +++--- .../rdf4j/sail/nativerdf/TripleStore.java | 10 +++---- .../rdf4j/sail/nativerdf/ValueStore.java | 26 ++++++++++--------- .../sail/nativerdf/datastore/DataFile.java | 6 ++--- .../sail/nativerdf/model/CorruptIRI.java | 6 +++-- .../nativerdf/model/CorruptIRIOrBNode.java | 6 +++-- .../sail/nativerdf/model/CorruptLiteral.java | 6 +++-- .../nativerdf/model/CorruptUnknownValue.java | 6 +++-- .../sail/nativerdf/model/CorruptValue.java | 14 +++++----- .../NativeSailStoreCorruptionTest.java | 6 ++--- .../documentation/programming/repository.md | 2 +- 12 files changed, 57 insertions(+), 43 deletions(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java index eee0d088ac0..29b803e6cb5 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java @@ -10,7 +10,7 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.nativerdf; -import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA; +import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES; import java.io.IOException; @@ -89,7 +89,7 @@ public Statement getNextElement() throws SailException { if (contextID != 0) { context = valueStore.getResource(contextID); } - if (SOFT_FAIL_ON_CORRUPT_DATA) { + if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { if (subj == null) { subj = new CorruptIRIOrBNode(valueStore.getRevision(), subjID, null); } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java index 36149498fee..e156083b8ef 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java @@ -66,11 +66,13 @@ public class NativeStore extends AbstractNotifyingSail implements FederatedServi /** * Do not throw an exception when corrupt data is detected. Instead, try to return as much data as possible. * - * Variable can be set through the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData. + * Variable can be set through the system property + * org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes. */ @InternalUseOnly - public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true" - .equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));; + public static boolean SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = "true" + .equalsIgnoreCase( + System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes"));; private static final Cleaner REMOVE_STORES_USED_FOR_MEMORY_OVERFLOW = Cleaner.create(); diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java index a546ee02da5..3c060af663d 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java @@ -295,12 +295,12 @@ private void initIndexes(Set indexSpecs) throws IOException { try { indexes.add(new TripleIndex(fieldSeq, false)); } catch (Exception e) { - if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) { + if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { invalidIndexes.add(fieldSeq); logger.warn("Ignoring index because it failed to initialize index '{}'", fieldSeq, e); } else { logger.error( - "Failed to initialize index '{}', consider setting org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true.", + "Failed to initialize index '{}', consider setting org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true.", fieldSeq, e); throw e; } @@ -309,7 +309,7 @@ private void initIndexes(Set indexSpecs) throws IOException { } - if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) { + if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { indexSpecs.removeAll(invalidIndexes); } @@ -319,12 +319,12 @@ private void initIndexes(Set indexSpecs) throws IOException { checkIfIndexesAreEmptyOrNot(nonEmptyIndexes, emptyIndexes); if (!emptyIndexes.isEmpty() && !nonEmptyIndexes.isEmpty()) { - if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) { + if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { indexes.removeAll(emptyIndexes); } else { for (TripleIndex index : emptyIndexes) { throw new IOException("Index '" + new String(index.getFieldSeq()) - + "' is unexpectedly empty while other indexes are not. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true. Index file: " + + "' is unexpectedly empty while other indexes are not. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true. Index file: " + index.getBTree().getFile().getAbsolutePath()); } } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index 8ed2f979485..6a8f1260b7c 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -10,7 +10,7 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.nativerdf; -import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA; +import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES; import java.io.File; import java.io.IOException; @@ -223,11 +223,11 @@ public T getResource(int id) throws IOExcepti NativeValue resultValue = getValue(id); if (resultValue != null && !(resultValue instanceof Resource)) { - if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) { + if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES && resultValue instanceof CorruptValue) { return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData()); } logger.warn( - "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); + "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); } return (T) resultValue; @@ -245,14 +245,14 @@ public T getIRI(int id) throws IOException { NativeValue resultValue = getValue(id); if (resultValue != null && !(resultValue instanceof IRI)) { - if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) { + if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES && resultValue instanceof CorruptValue) { if (resultValue instanceof CorruptIRI) { return (T) resultValue; } return (T) new CorruptIRI(revision, id, null, ((CorruptValue) resultValue).getData()); } logger.warn( - "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); + "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); } return (T) resultValue; @@ -586,12 +586,12 @@ private boolean isNamespaceData(byte[] data) { private NativeValue data2value(int id, byte[] data) throws IOException { if (data.length == 0) { - if (SOFT_FAIL_ON_CORRUPT_DATA) { + if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id); return new CorruptUnknownValue(revision, id, data); } throw new SailException("Empty data array for value with id " + id - + " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); + + " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); } switch (data[0]) { case URI_VALUE: @@ -601,12 +601,12 @@ private NativeValue data2value(int id, byte[] data) throws IOException { case LITERAL_VALUE: return data2literal(id, data); default: - if (SOFT_FAIL_ON_CORRUPT_DATA) { + if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id); return new CorruptUnknownValue(revision, id, data); } throw new SailException("Invalid type " + data[0] + " for value with id " + id - + " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); + + " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); } } @@ -621,11 +621,12 @@ private T data2uri(int id, byte[] data) throws IOE return (T) new NativeIRI(revision, namespace, localName, id); } catch (Throwable e) { - if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) { + if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES + && (e instanceof Exception || e instanceof AssertionError)) { return (T) new CorruptIRI(revision, id, namespace, data); } logger.error( - "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); + "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); throw e; } @@ -663,7 +664,8 @@ private T data2literal(int id, byte[] data) th return (T) new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id); } } catch (Throwable e) { - if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) { + if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES + && (e instanceof Exception || e instanceof AssertionError)) { return (T) new CorruptLiteral(revision, id, data); } throw e; diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java index 10d98ab3b41..ab22e310569 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java @@ -10,7 +10,7 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.nativerdf.datastore; -import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA; +import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES; import java.io.Closeable; import java.io.File; @@ -205,7 +205,7 @@ public byte[] getData(long offset) throws IOException { // If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption. if (dataLength > 128 * 1024 * 1024) { - if (SOFT_FAIL_ON_CORRUPT_DATA) { + if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { logger.error( "Data length is {}MB which is larger than 750MB. This is likely data corruption. Truncating length to 32 MB.", dataLength / ((1024 * 1024))); @@ -244,7 +244,7 @@ public byte[] getData(long offset) throws IOException { } catch (OutOfMemoryError e) { if (dataLength > 128 * 1024 * 1024) { logger.error( - "Trying to read large amounts of data may be a sign of data corruption. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true"); + "Trying to read large amounts of data may be a sign of data corruption. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); } throw e; } diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java index 819e13cae5a..71816d29e4d 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java @@ -15,13 +15,15 @@ import org.apache.commons.codec.binary.Hex; import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.sail.nativerdf.NativeStore; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; import com.google.common.net.UrlEscapers; /** - * CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see - * ValueStore#softFailOnCorruptData). + * CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled + * + * @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES . * * @author Håvard M. Ottestad */ diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java index 740530a21dc..83cdb9e6658 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java @@ -16,13 +16,15 @@ import org.apache.commons.codec.binary.Hex; import org.eclipse.rdf4j.model.BNode; import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.sail.nativerdf.NativeStore; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; import com.google.common.net.UrlEscapers; /** - * CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see - * ValueStore#softFailOnCorruptData). + * CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled + * + * @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES . * * @author Håvard M. Ottestad */ diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java index 0a8bef55523..eb6b2587c25 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java @@ -22,11 +22,13 @@ import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.base.CoreDatatype; import org.eclipse.rdf4j.model.util.Values; +import org.eclipse.rdf4j.sail.nativerdf.NativeStore; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; /** - * CorruptLiteral is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see - * ValueStore#softFailOnCorruptData). + * CorruptLiteral is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled + * + * @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES . * * @author Håvard M. Ottestad */ diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptUnknownValue.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptUnknownValue.java index 9af37481088..ea200b55fa5 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptUnknownValue.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptUnknownValue.java @@ -22,11 +22,13 @@ import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.base.CoreDatatype; import org.eclipse.rdf4j.model.vocabulary.XSD; +import org.eclipse.rdf4j.sail.nativerdf.NativeStore; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; /** - * CorruptUnknownValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see - * ValueStore#softFailOnCorruptData). Since a type is needed + * CorruptUnknownValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled + * + * @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES . * * @author Håvard M. Ottestad */ diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java index 53a3576e443..94028b5c579 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptValue.java @@ -11,15 +11,17 @@ package org.eclipse.rdf4j.sail.nativerdf.model; +import org.eclipse.rdf4j.sail.nativerdf.NativeStore; import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision; /** - * CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see - * ValueStore#softFailOnCorruptData). - *

- * There is no method isCorruptValue() as it would exist for a "regular" implementation of NativeValue. Since - * CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using - * "instanceof". + * CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled + * + * @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES . + *

+ * There is no method isCorruptValue() as it would exist for a "regular" implementation of NativeValue. Since + * CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using + * "instanceof". * * @author Hannes Ebner */ diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java index 262383429c1..12119ceb50b 100644 --- a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java @@ -97,7 +97,7 @@ public void before() throws IOException { backupFile(dataDir, "triples-spoc.alloc"); backupFile(dataDir, "triples-spoc.dat"); - NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true; + NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = true; } @@ -316,7 +316,7 @@ public void testCorruptValuesPoscDataFile() throws IOException { long fileSize = nativeStoreFile.length(); for (long i = 4; i < fileSize; i++) { - NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true; + NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = true; restoreFile(dataDir, file); overwriteByteInFile(nativeStoreFile, i, 0x0); repo.init(); @@ -391,7 +391,7 @@ private List getStatements() { @AfterEach public void after() throws IOException { - NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = false; + NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = false; repo.shutDown(); } } diff --git a/site/content/documentation/programming/repository.md b/site/content/documentation/programming/repository.md index 7aab3115d0f..691a839c74e 100644 --- a/site/content/documentation/programming/repository.md +++ b/site/content/documentation/programming/repository.md @@ -98,7 +98,7 @@ import org.eclipse.rdf4j.sail.nativerdf.NativeStore; Repository repo = new SailRepository(new NativeStore()); ``` -In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData` can be set to `true` to +In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes` can be set to `true` to allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects. Take a backup of all data before setting this property as it allows the NativeStore to delete corrupt indexes in an attempt to recreate them. Consider this feature experimental and use with caution. From 590e658a7658a305521ec96c22db270deff8c24e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Fri, 25 Oct 2024 11:39:58 +0200 Subject: [PATCH 10/12] GH-5148 better detection of non-empty b-tree --- .../org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java index bcf91f51396..b297d940ea3 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java @@ -25,6 +25,7 @@ import org.eclipse.rdf4j.common.io.ByteArrayUtil; import org.eclipse.rdf4j.common.io.NioFile; import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.nativerdf.NativeStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -292,9 +293,10 @@ public BTree(File dataDir, String filenamePrefix, int blockSize, int valueSize, this.valueSize = buf.getInt(); this.rootNodeID = buf.getInt(); - if (rootNodeID == 0) { - if (nioFile.size() >= 1024) { - throw new IllegalStateException("Root node ID is 0 but file is not empty"); + if (rootNodeID == 0 && NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { + if (nioFile.size() > blockSize) { + throw new SailException("Root node ID is 0 but file is not empty. Btree may be corrupt. File: " + + file.getAbsolutePath()); } } From 8fbb4ee806a7a8f2b3326cda5c8e17ba0b5d3655 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 10 Nov 2024 12:53:12 +0100 Subject: [PATCH 11/12] GH-5148 improved error message --- .../java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java index 6a8f1260b7c..37787ac610c 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java @@ -227,7 +227,7 @@ public T getResource(int id) throws IOExcepti return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData()); } logger.warn( - "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); + "NativeStore is possibly corrupt. To attempt to repair or retrieve the data, read the documentation on http://rdf4j.org about the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes"); } return (T) resultValue; @@ -252,7 +252,7 @@ public T getIRI(int id) throws IOException { return (T) new CorruptIRI(revision, id, null, ((CorruptValue) resultValue).getData()); } logger.warn( - "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); + "NativeStore is possibly corrupt. To attempt to repair or retrieve the data, read the documentation on http://rdf4j.org about the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes"); } return (T) resultValue; @@ -625,8 +625,8 @@ private T data2uri(int id, byte[] data) throws IOE && (e instanceof Exception || e instanceof AssertionError)) { return (T) new CorruptIRI(revision, id, namespace, data); } - logger.error( - "Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true"); + logger.warn( + "NativeStore is possibly corrupt. To attempt to repair or retrieve the data, read the documentation on http://rdf4j.org about the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes"); throw e; } From c47fe2b79b01f9566a2abdbc1548f9afbc258b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 10 Nov 2024 12:59:12 +0100 Subject: [PATCH 12/12] improve javadocs and make some tests more robust --- .../rdf4j/sail/nativerdf/datastore/DataFile.java | 3 ++- .../eclipse/rdf4j/sail/shacl/MultithreadedTest.java | 7 +++++++ .../rdf4j/testsuite/sail/SailConcurrencyTest.java | 11 ++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java index ab22e310569..73e9c349de7 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java @@ -203,7 +203,8 @@ public byte[] getData(long offset) throws IOException { (data[2] << 8) & 0x0000ff00 | (data[3]) & 0x000000ff; - // If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption. + // If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption. The + // limit of 750MB was chosen based on results from experimenting in the NativeSailStoreCorruptionTest class. if (dataLength > 128 * 1024 * 1024) { if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) { logger.error( diff --git a/core/sail/shacl/src/test/java/org/eclipse/rdf4j/sail/shacl/MultithreadedTest.java b/core/sail/shacl/src/test/java/org/eclipse/rdf4j/sail/shacl/MultithreadedTest.java index 89aea92cac9..be87b248131 100644 --- a/core/sail/shacl/src/test/java/org/eclipse/rdf4j/sail/shacl/MultithreadedTest.java +++ b/core/sail/shacl/src/test/java/org/eclipse/rdf4j/sail/shacl/MultithreadedTest.java @@ -46,6 +46,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.parallel.Isolated; import org.slf4j.LoggerFactory; @@ -67,6 +68,7 @@ public static void afterAll() { } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testDataAndShapes() { System.out.println("testDataAndShapes"); @@ -339,6 +341,7 @@ private void remove(String turtle, IRI graph) { } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testLotsOfValidationFailuresSnapshot() throws IOException { System.out.println("testLotsOfValidationFailuresSnapshot"); ShaclSail sail = new ShaclSail(getBaseSail()); @@ -354,6 +357,7 @@ public void testLotsOfValidationFailuresSnapshot() throws IOException { } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testLotsOfValidationFailuresSerializableValidation() throws IOException { System.out.println("testLotsOfValidationFailuresSerializableValidation"); Logger root = (Logger) LoggerFactory.getLogger(ShaclSailBaseConfiguration.class.getName()); @@ -371,6 +375,7 @@ public void testLotsOfValidationFailuresSerializableValidation() throws IOExcept } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testLotsOfValidationFailuresSerializable() throws IOException { System.out.println("testLotsOfValidationFailuresSerializable"); @@ -389,6 +394,7 @@ public void testLotsOfValidationFailuresSerializable() throws IOException { } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testLotsOfValidationFailuresReadCommitted() throws IOException { System.out.println("testLotsOfValidationFailuresReadCommitted"); ShaclSail sail = new ShaclSail(getBaseSail()); @@ -403,6 +409,7 @@ public void testLotsOfValidationFailuresReadCommitted() throws IOException { } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testLotsOfValidationFailuresReadUncommitted() throws IOException { System.out.println("testLotsOfValidationFailuresReadUncommitted"); ShaclSail sail = new ShaclSail(getBaseSail()); diff --git a/testsuites/sail/src/main/java/org/eclipse/rdf4j/testsuite/sail/SailConcurrencyTest.java b/testsuites/sail/src/main/java/org/eclipse/rdf4j/testsuite/sail/SailConcurrencyTest.java index d47793cc3ec..4ee406b0e7f 100644 --- a/testsuites/sail/src/main/java/org/eclipse/rdf4j/testsuite/sail/SailConcurrencyTest.java +++ b/testsuites/sail/src/main/java/org/eclipse/rdf4j/testsuite/sail/SailConcurrencyTest.java @@ -37,6 +37,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -155,6 +156,7 @@ public int getSize() { * @see https://github.com/eclipse/rdf4j/issues/693 */ @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testConcurrentAddLargeTxn() throws Exception { logger.info("executing two large concurrent transactions"); final CountDownLatch runnersDone = new CountDownLatch(2); @@ -196,6 +198,7 @@ public void testConcurrentAddLargeTxn() throws Exception { * one of the transactions rolls back at the end. */ @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testConcurrentAddLargeTxnRollback() throws Exception { logger.info("executing two large concurrent transactions"); final CountDownLatch runnersDone = new CountDownLatch(2); @@ -237,6 +240,7 @@ public void testConcurrentAddLargeTxnRollback() throws Exception { } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) @Disabled("This test takes a long time and accomplishes little extra") public void testGetContextIDs() throws Exception { // Create one thread which writes statements to the repository, on a @@ -314,6 +318,7 @@ public void testGetContextIDs() throws Exception { } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testConcurrentConnectionsShutdown() throws InterruptedException { if (store instanceof AbstractSail) { ((AbstractSail) store).setConnectionTimeOut(200); @@ -356,8 +361,9 @@ public void testConcurrentConnectionsShutdown() throws InterruptedException { } -// @Disabled + // @Disabled @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testSerialThreads() throws InterruptedException { if (store instanceof AbstractSail) { ((AbstractSail) store).setConnectionTimeOut(200); @@ -438,6 +444,7 @@ public void testSerialThreads() throws InterruptedException { } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testConcurrentConnectionsShutdownReadCommitted() throws InterruptedException { if (store instanceof AbstractSail) { ((AbstractSail) store).setConnectionTimeOut(200); @@ -493,6 +500,7 @@ public void testConcurrentConnectionsShutdownReadCommitted() throws InterruptedE } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testConcurrentConnectionsShutdownAndClose() throws InterruptedException { if (store instanceof AbstractSail) { ((AbstractSail) store).setConnectionTimeOut(200); @@ -568,6 +576,7 @@ public void testConcurrentConnectionsShutdownAndClose() throws InterruptedExcept } @Test + @Timeout(value = 30, unit = TimeUnit.MINUTES) public void testConcurrentConnectionsShutdownAndCloseRollback() throws InterruptedException { if (store instanceof AbstractSail) { ((AbstractSail) store).setConnectionTimeOut(200);