Skip to content

Commit

Permalink
GH-5148 cleanup naming and docs
Browse files Browse the repository at this point in the history
  • Loading branch information
hmottestad committed Oct 24, 2024
1 parent 0076003 commit 2e075d5
Show file tree
Hide file tree
Showing 12 changed files with 57 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
*******************************************************************************/
package org.eclipse.rdf4j.sail.nativerdf;

import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA;
import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES;

import java.io.IOException;

Expand Down Expand Up @@ -89,7 +89,7 @@ public Statement getNextElement() throws SailException {
if (contextID != 0) {
context = valueStore.getResource(contextID);
}
if (SOFT_FAIL_ON_CORRUPT_DATA) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
if (subj == null) {
subj = new CorruptIRIOrBNode(valueStore.getRevision(), subjID, null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,13 @@ public class NativeStore extends AbstractNotifyingSail implements FederatedServi
/**
* Do not throw an exception when corrupt data is detected. Instead, try to return as much data as possible.
*
* Variable can be set through the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData.
* Variable can be set through the system property
* org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes.
*/
@InternalUseOnly
public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true"
.equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));;
public static boolean SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = "true"
.equalsIgnoreCase(
System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes"));;

private static final Cleaner REMOVE_STORES_USED_FOR_MEMORY_OVERFLOW = Cleaner.create();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,12 @@ private void initIndexes(Set<String> indexSpecs) throws IOException {
try {
indexes.add(new TripleIndex(fieldSeq, false));
} catch (Exception e) {
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) {
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
invalidIndexes.add(fieldSeq);
logger.warn("Ignoring index because it failed to initialize index '{}'", fieldSeq, e);
} else {
logger.error(
"Failed to initialize index '{}', consider setting org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true.",
"Failed to initialize index '{}', consider setting org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true.",
fieldSeq, e);
throw e;
}
Expand All @@ -309,7 +309,7 @@ private void initIndexes(Set<String> indexSpecs) throws IOException {

}

if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) {
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
indexSpecs.removeAll(invalidIndexes);
}

Expand All @@ -319,12 +319,12 @@ private void initIndexes(Set<String> indexSpecs) throws IOException {
checkIfIndexesAreEmptyOrNot(nonEmptyIndexes, emptyIndexes);

if (!emptyIndexes.isEmpty() && !nonEmptyIndexes.isEmpty()) {
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) {
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
indexes.removeAll(emptyIndexes);
} else {
for (TripleIndex index : emptyIndexes) {
throw new IOException("Index '" + new String(index.getFieldSeq())
+ "' is unexpectedly empty while other indexes are not. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true. Index file: "
+ "' is unexpectedly empty while other indexes are not. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true. Index file: "
+ index.getBTree().getFile().getAbsolutePath());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
*******************************************************************************/
package org.eclipse.rdf4j.sail.nativerdf;

import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA;
import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES;

import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -223,11 +223,11 @@ public <T extends NativeValue & Resource> T getResource(int id) throws IOExcepti
NativeValue resultValue = getValue(id);

if (resultValue != null && !(resultValue instanceof Resource)) {
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES && resultValue instanceof CorruptValue) {
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
}
logger.warn(
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true");
}

return (T) resultValue;
Expand All @@ -245,14 +245,14 @@ public <T extends NativeValue & IRI> T getIRI(int id) throws IOException {
NativeValue resultValue = getValue(id);

if (resultValue != null && !(resultValue instanceof IRI)) {
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES && resultValue instanceof CorruptValue) {
if (resultValue instanceof CorruptIRI) {
return (T) resultValue;
}
return (T) new CorruptIRI(revision, id, null, ((CorruptValue) resultValue).getData());
}
logger.warn(
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true");
}

return (T) resultValue;
Expand Down Expand Up @@ -586,12 +586,12 @@ private boolean isNamespaceData(byte[] data) {

private NativeValue data2value(int id, byte[] data) throws IOException {
if (data.length == 0) {
if (SOFT_FAIL_ON_CORRUPT_DATA) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id);
return new CorruptUnknownValue(revision, id, data);
}
throw new SailException("Empty data array for value with id " + id
+ " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
+ " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true");
}
switch (data[0]) {
case URI_VALUE:
Expand All @@ -601,12 +601,12 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
case LITERAL_VALUE:
return data2literal(id, data);
default:
if (SOFT_FAIL_ON_CORRUPT_DATA) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id);
return new CorruptUnknownValue(revision, id, data);
}
throw new SailException("Invalid type " + data[0] + " for value with id " + id
+ " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
+ " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true");
}
}

Expand All @@ -621,11 +621,12 @@ private <T extends IRI & NativeValue> T data2uri(int id, byte[] data) throws IOE

return (T) new NativeIRI(revision, namespace, localName, id);
} catch (Throwable e) {
if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES
&& (e instanceof Exception || e instanceof AssertionError)) {
return (T) new CorruptIRI(revision, id, namespace, data);
}
logger.error(
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true");
throw e;
}

Expand Down Expand Up @@ -663,7 +664,8 @@ private <T extends NativeValue & Literal> T data2literal(int id, byte[] data) th
return (T) new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id);
}
} catch (Throwable e) {
if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES
&& (e instanceof Exception || e instanceof AssertionError)) {
return (T) new CorruptLiteral(revision, id, data);
}
throw e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
*******************************************************************************/
package org.eclipse.rdf4j.sail.nativerdf.datastore;

import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA;
import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES;

import java.io.Closeable;
import java.io.File;
Expand Down Expand Up @@ -205,7 +205,7 @@ public byte[] getData(long offset) throws IOException {

// If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption.
if (dataLength > 128 * 1024 * 1024) {
if (SOFT_FAIL_ON_CORRUPT_DATA) {
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
logger.error(
"Data length is {}MB which is larger than 750MB. This is likely data corruption. Truncating length to 32 MB.",
dataLength / ((1024 * 1024)));
Expand Down Expand Up @@ -244,7 +244,7 @@ public byte[] getData(long offset) throws IOException {
} catch (OutOfMemoryError e) {
if (dataLength > 128 * 1024 * 1024) {
logger.error(
"Trying to read large amounts of data may be a sign of data corruption. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
"Trying to read large amounts of data may be a sign of data corruption. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true");
}
throw e;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@

import org.apache.commons.codec.binary.Hex;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.sail.nativerdf.NativeStore;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

import com.google.common.net.UrlEscapers;

/**
* CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
* CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled
*
* @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES .
*
* @author Håvard M. Ottestad
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
import org.apache.commons.codec.binary.Hex;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.sail.nativerdf.NativeStore;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

import com.google.common.net.UrlEscapers;

/**
* CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
* CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled
*
* @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES .
*
* @author Håvard M. Ottestad
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.sail.nativerdf.NativeStore;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

/**
* CorruptLiteral is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
* CorruptLiteral is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled
*
* @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES .
*
* @author Håvard M. Ottestad
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.sail.nativerdf.NativeStore;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

/**
* CorruptUnknownValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData). Since a type is needed
* CorruptUnknownValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled
*
* @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES .
*
* @author Håvard M. Ottestad
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@

package org.eclipse.rdf4j.sail.nativerdf.model;

import org.eclipse.rdf4j.sail.nativerdf.NativeStore;
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;

/**
* CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
* ValueStore#softFailOnCorruptData).
* <p>
* There is no method isCorruptValue() as it would exist for a "regular" implementation of NativeValue. Since
* CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using
* "instanceof".
* CorruptValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled
*
* @see NativeStore#SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES .
* <p>
* There is no method isCorruptValue() as it would exist for a "regular" implementation of NativeValue. Since
* CorruptValue is only to be used in exceptional situations, the recommended way of checking for it is using
* "instanceof".
*
* @author Hannes Ebner
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public void before() throws IOException {
backupFile(dataDir, "triples-spoc.alloc");
backupFile(dataDir, "triples-spoc.dat");

NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true;
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = true;

}

Expand Down Expand Up @@ -316,7 +316,7 @@ public void testCorruptValuesPoscDataFile() throws IOException {
long fileSize = nativeStoreFile.length();

for (long i = 4; i < fileSize; i++) {
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true;
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = true;
restoreFile(dataDir, file);
overwriteByteInFile(nativeStoreFile, i, 0x0);
repo.init();
Expand Down Expand Up @@ -391,7 +391,7 @@ private List<Statement> getStatements() {

@AfterEach
public void after() throws IOException {
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = false;
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = false;
repo.shutDown();
}
}
2 changes: 1 addition & 1 deletion site/content/documentation/programming/repository.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ import org.eclipse.rdf4j.sail.nativerdf.NativeStore;
Repository repo = new SailRepository(new NativeStore());
```

In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData` can be set to `true` to
In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes` can be set to `true` to
allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects. Take a backup of all data before setting
this property as it allows the NativeStore to delete corrupt indexes in an attempt to recreate them. Consider this feature experimental and use with caution.

Expand Down

0 comments on commit 2e075d5

Please sign in to comment.