diff --git a/schema/iguana-schema.json b/schema/iguana-schema.json index 0ad54c1b7..2479434ea 100644 --- a/schema/iguana-schema.json +++ b/schema/iguana-schema.json @@ -198,6 +198,12 @@ }, "path": { "type": "string" + }, + "compressionLevel": { + "type": "integer", + "minimum": 1, + "maximum": 19, + "default": 3 } }, "required": [ diff --git a/src/main/java/org/aksw/iguana/cc/storage/Storage.java b/src/main/java/org/aksw/iguana/cc/storage/Storage.java index 06d1c2234..f46f533e3 100644 --- a/src/main/java/org/aksw/iguana/cc/storage/Storage.java +++ b/src/main/java/org/aksw/iguana/cc/storage/Storage.java @@ -31,4 +31,6 @@ default void storeData(Storable data) { storeResult(((Storable.AsRDF) data).toRDF()); } } + + default void close() {} } diff --git a/src/main/java/org/aksw/iguana/cc/storage/impl/CSVStorage.java b/src/main/java/org/aksw/iguana/cc/storage/impl/CSVStorage.java index 6565525fc..7ba3a5a73 100644 --- a/src/main/java/org/aksw/iguana/cc/storage/impl/CSVStorage.java +++ b/src/main/java/org/aksw/iguana/cc/storage/impl/CSVStorage.java @@ -32,8 +32,12 @@ public class CSVStorage implements Storage { /** This private record is used to store information about the connections used in a task. */ private record ConnectionInfo(String connection, String version, String dataset) {} - public record Config(String directory) implements StorageConfig { + public record Config(String directory, Integer compressionLevel) implements StorageConfig { public Config(String directory) { + this(directory, null); + } + + public Config(String directory, Integer compressionLevel) { if (directory == null) { directory = "results"; } @@ -42,6 +46,7 @@ public Config(String directory) { throw new IllegalArgumentException("The given path is not a directory."); } this.directory = directory; + this.compressionLevel = 3; } } @@ -58,8 +63,14 @@ public Config(String directory) { private Resource taskRes; List connections; + private Integer compressionLevel = 3; + public CSVStorage(Config config, List metrics, String suiteID) { this(config.directory(), metrics, suiteID); + if (config.compressionLevel() != null && (config.compressionLevel() < 1 || config.compressionLevel() > 19)) { + throw new IllegalArgumentException("Compression level must be between 1 and 19."); + } + this.compressionLevel = 3; } public CSVStorage(String folderPath, List metrics, String suiteID) { @@ -230,6 +241,29 @@ public void storeData(Storable data) { } } + @Override + public void close() { + final var temp = this.suiteFolder.getParent().relativize(this.suiteFolder); + LOGGER.info("Compressing the suite folder."); + LOGGER.info(String.join(" ", "tar", "-I", String.format("\"zstd -%d\"", compressionLevel), "-cf", temp + ".tar.zst", temp.toString())); + LOGGER.info(this.suiteFolder.getParent().toFile().toString()); + if (this.compressionLevel != null) { + try { + final var process = new ProcessBuilder("tar", "-I", String.format("\"zstd -%d\"", compressionLevel), "-cf", temp + ".tar.zst", temp.toString()) + .directory(this.suiteFolder.getParent().toFile().getAbsoluteFile()) + .start(); + try { + LOGGER.info("Waiting for the compression process to finish."); + process.waitFor(); + LOGGER.info(Arrays.toString(process.getInputStream().readAllBytes())); + LOGGER.info("Compression process finished. Fuck you"); + } catch (InterruptedException ignored) {} // ignore interruption + } catch (IOException e) { + LOGGER.error("Error while compressing the suite folder.", e); + } + } + } + /** * This method sets the objects attributes by querying the given model. * diff --git a/src/main/java/org/aksw/iguana/cc/storage/impl/RDFFileStorage.java b/src/main/java/org/aksw/iguana/cc/storage/impl/RDFFileStorage.java index 73ca1642d..c90eaa215 100644 --- a/src/main/java/org/aksw/iguana/cc/storage/impl/RDFFileStorage.java +++ b/src/main/java/org/aksw/iguana/cc/storage/impl/RDFFileStorage.java @@ -21,7 +21,11 @@ import java.util.function.Supplier; public class RDFFileStorage implements Storage { - public record Config(String path) implements StorageConfig {} + public record Config(String path, Integer compressionLevel) implements StorageConfig { + public Config(String path) { + this(path, null); + } + } private static final Logger LOGGER = LoggerFactory.getLogger(RDFFileStorage.class.getName()); @@ -36,11 +40,14 @@ public record Config(String path) implements StorageConfig {} now.get(Calendar.MILLISECOND)); }; - final private Lang lang; + private final Lang lang; private Path path; + private boolean compression; + private int compressionLevel; + private OutputStream outputStream; public RDFFileStorage(Config config) { - this(config.path()); + this(config.path(), config.compressionLevel()); } /** @@ -50,6 +57,15 @@ public RDFFileStorage() { this(""); } + public RDFFileStorage(String filename, Integer compressionLevel) { + this(filename); + if (compressionLevel != null && (compressionLevel < 1 || compressionLevel > 19)) { + throw new IllegalArgumentException("Compression level must be between 1 and 19."); + } + this.compression = compressionLevel != null; + this.compressionLevel = compressionLevel != null ? compressionLevel : 0; + } + /** * Uses the provided filename. If the filename is null or empty, a generated file called * results_{DD}-{MM}-{YYYY}_{HH}-{mm}.ttl is used. The file extension determines the file format. @@ -62,7 +78,8 @@ public RDFFileStorage(String fileName) { } else { path = Paths.get(fileName); - if (Files.exists(path) && Files.isDirectory(path)) { + if ((Files.exists(path) || Files.exists(path.resolveSibling(path.getFileName() + ".zstd"))) + && Files.isDirectory(path)) { path = path.resolve(defaultFileNameSupplier.get() + ".ttl"); } else if (Files.exists(path)) { path = Paths.get(FilenameUtils.removeExtension(fileName) + "_" + defaultFileNameSupplier.get() + ".ttl"); // we're just going to assume that that's enough to make it unique @@ -79,14 +96,27 @@ public RDFFileStorage(String fileName) { } @Override - public void storeResult(Model data){ - try (OutputStream os = new FileOutputStream(path.toString(), true)) { + public void storeResult(Model data) { + try { + OutputStream os = getFileOutputstream(); RDFDataMgr.write(os, data, this.lang); } catch (IOException e) { LOGGER.error("Could not write to RDFFileStorage using lang: " + lang, e); } } + @Override + public void close() { + if (outputStream != null) { + try { + outputStream.close(); + } catch (IOException e) { + LOGGER.error("Could not close output stream for RDFFileStorage", e); + } + } + outputStream = null; + } + @Override public String toString() { return this.getClass().getSimpleName(); @@ -95,4 +125,19 @@ public String toString() { public String getFileName() { return this.path.toString(); } + + private OutputStream getFileOutputstream() throws IOException { + if (outputStream != null) { + return outputStream; + } + + if (compression) { // TODO: check if process closes properly + final var process = new ProcessBuilder("zstd", "-o", path.toString() + ".zstd", "-T0", "-" + compressionLevel, "-q", "-").start(); + outputStream = process.getOutputStream(); + return outputStream; + } else { + // appending stream doesn't need to be kept open in between writes + return new FileOutputStream(path.toString(), true); + } + } } diff --git a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java index c748f3244..611766e0a 100644 --- a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java @@ -207,6 +207,9 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { } } } + for (var storage : storages) { + storage.close(); + } } /** diff --git a/src/main/resources/iguana-schema.json b/src/main/resources/iguana-schema.json index d92fb8d67..2479434ea 100644 --- a/src/main/resources/iguana-schema.json +++ b/src/main/resources/iguana-schema.json @@ -198,6 +198,12 @@ }, "path": { "type": "string" + }, + "compressionLevel": { + "type": "integer", + "minimum": 1, + "maximum": 19, + "default": 3 } }, "required": [ @@ -354,7 +360,7 @@ } }, "required": [ - "endpoint" + "endpoint" ], "title": "Template" }, diff --git a/src/test/java/org/aksw/iguana/cc/config/elements/StorageConfigTest.java b/src/test/java/org/aksw/iguana/cc/config/elements/StorageConfigTest.java index 0c99a46dd..d441059ad 100644 --- a/src/test/java/org/aksw/iguana/cc/config/elements/StorageConfigTest.java +++ b/src/test/java/org/aksw/iguana/cc/config/elements/StorageConfigTest.java @@ -17,9 +17,9 @@ class StorageConfigTest { private static Stream testData() { return Stream.of( - Arguments.of(new RDFFileStorage.Config("some.ttl"), + Arguments.of(new RDFFileStorage.Config("some.ttl", 2), """ - {"type":"rdf file","path":"some.ttl"} + {"type":"rdf file","path":"some.ttl", "compressionLevel": 2} """ ), Arguments.of(new CSVStorage.Config("csv_results/"),