diff --git a/.gitignore b/.gitignore index 75b520bf014..574d708ce85 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ org.eclipse.dash.licenses-1.0.2.jar e2e/node_modules e2e/playwright-report e2e/test-results +.aider* diff --git a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java index d369c1649cf..bf3fb4c92dc 100644 --- a/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java +++ b/core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java @@ -18,6 +18,9 @@ import java.util.NoSuchElementException; import org.eclipse.rdf4j.common.io.NioFile; +import org.eclipse.rdf4j.sail.nativerdf.ValueStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class supplying access to a data file. A data file stores data sequentially. Each entry starts with the entry's @@ -27,6 +30,8 @@ */ public class DataFile implements Closeable { + private static final Logger logger = LoggerFactory.getLogger(DataFile.class); + /*-----------* * Constants * *-----------*/ @@ -197,6 +202,16 @@ public byte[] getData(long offset) throws IOException { (data[2] << 8) & 0x0000ff00 | (data[3]) & 0x000000ff; + // If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption. + if (dataLength > 750 * 1024 * 1024) { + if (ValueStore.SOFT_FAIL_ON_CORRUPT_DATA) { + logger.error( + "Data length is {}MB which is larger than 750MB. This is likely data corruption. Truncating length to 32 MB.", + dataLength / ((1024 * 1024))); + dataLength = 32 * 1024 * 1024; + } + } + // We have either managed to read enough data and can return the required subset of the data, or we have read // too little so we need to execute another read to get the correct data. if (dataLength <= data.length - 4) { diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java index a85dd6076b1..bff87354394 100644 --- a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java @@ -88,6 +88,8 @@ public void before() throws IOException { conn.add(S5, CTX_2); } backupFile(dataDir, "values.dat"); + backupFile(dataDir, "values.id"); + backupFile(dataDir, "values.hash"); } public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException { @@ -116,7 +118,7 @@ public static void backupFile(File dataDir, String s) throws IOException { File backupFile = new File(dataDir, s + ".bak"); if (!valuesFile.exists()) { - throw new IOException("values.dat does not exist and cannot be backed up."); + throw new IOException(s + " does not exist and cannot be backed up."); } // Copy values.dat to values.dat.bak @@ -128,7 +130,7 @@ public static void restoreFile(File dataDir, String s) throws IOException { File backupFile = new File(dataDir, s + ".bak"); if (!backupFile.exists()) { - throw new IOException("Backup file values.dat.bak does not exist."); + throw new IOException("Backup file " + s + ".bak does not exist."); } // Copy values.dat.bak back to values.dat @@ -196,9 +198,53 @@ public void testCorruptValuesDatFileEntireValuesDatFile() throws IOException { List<Statement> list = getStatements(); assertEquals(6, list.size()); + } + } + + @Test + public void testCorruptLastByteOfValuesDatFile() throws IOException { + repo.shutDown(); + File valuesFile = new File(dataDir, "values.dat"); + long fileSize = valuesFile.length(); + + overwriteByteInFile(valuesFile, fileSize - 1, 0x0); + + repo.init(); + + List<Statement> list = getStatements(); + assertEquals(6, list.size()); + } + + @Test + public void testCorruptValuesIdFile() throws IOException { + repo.shutDown(); + File valuesIdFile = new File(dataDir, "values.id"); + long fileSize = valuesIdFile.length(); + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, "values.id"); + overwriteByteInFile(valuesIdFile, i, 0x0); + repo.init(); + List<Statement> list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); } + } + @Test + public void testCorruptValuesHashFile() throws IOException { + repo.shutDown(); + File valuesHashFile = new File(dataDir, "values.hash"); + long fileSize = valuesHashFile.length(); + + for (long i = 4; i < fileSize; i++) { + restoreFile(dataDir, "values.hash"); + overwriteByteInFile(valuesHashFile, i, 0x0); + repo.init(); + List<Statement> list = getStatements(); + assertEquals(6, list.size(), "Failed at byte position " + i); + repo.shutDown(); + } } @NotNull @@ -226,7 +272,10 @@ private List<Statement> getStatements() { } @AfterEach - public void after() { + public void after() throws IOException { repo.shutDown(); + restoreFile(dataDir, "values.hash"); + restoreFile(dataDir, "values.id"); + restoreFile(dataDir, "values.dat"); } }