Skip to content

Commit

Permalink
Write number of processed records to parameters File #344
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Nov 2, 2023
1 parent c57a545 commit c594b3e
Show file tree
Hide file tree
Showing 21 changed files with 84 additions and 33 deletions.
13 changes: 13 additions & 0 deletions src/main/java/de/gwdg/metadataqa/marc/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

import de.gwdg.metadataqa.marc.definition.MarcVersion;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DurationFormatUtils;

import java.io.Serializable;
import java.math.BigDecimal;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
Expand Down Expand Up @@ -211,4 +213,15 @@ public static String base64decode(String raw) {
Base64.Decoder dec = Base64.getDecoder();
return new String(dec.decode(raw.replaceAll("^base64:", "")));
}

/**
* Create a human readable duration string, such as '03:24:12' or '1d 12:34:56'
* @param duration Duration in milliseconds
* @return a human readable duration string
*/
public static String formatDuration(long duration) {
return Duration.ofMillis(duration).toDays() > 0
? DurationFormatUtils.formatDuration(duration, "d'd' HH:mm:ss", true)
: DurationFormatUtils.formatDuration(duration, "HH:mm:ss", true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,14 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
printAuthoritiesByCategories();
printAuthoritiesBySchema();
printAuthoritiesByRecords();
printAuthoritiesHistogram();
printFrequencyExamples();
printAuthoritiesSubfieldsStatistics();
saveParameters("authorities.params.json", parameters);
saveParameters("authorities.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

private void printAuthoritiesByCategories() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

Expand Down Expand Up @@ -119,8 +120,8 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
saveParameters("bl-classifications.params.json", parameters);
public void afterIteration(int numberOfprocessedRecords, long duration) {
saveParameters("bl-classifications.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
logger.info("after iteration: " + numberOfprocessedRecords);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,15 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
printClassificationsBySchema();
printClassificationsByRecords();
printClassificationsHistogram();
printFrequencyExamples();
printSchemaSubfieldsStatistics();
if (parameters.doCollectCollocations())
printClassificationsCollocation();
saveParameters("classifications.params.json", parameters);
saveParameters("classifications.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

private void printClassificationsCollocation() {
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
String fileExtension = ".csv";
final char separator = getSeparator(parameters.getFormat());
if (parameters.getFormat().equals(ValidationErrorFormat.TAB_SEPARATED)) {
Expand All @@ -191,7 +191,7 @@ public void afterIteration(int numberOfprocessedRecords) {
savePackages(fileExtension, separator);
saveMarcElements(fileExtension, separator);
}
saveParameters("completeness.params.json", parameters);
saveParameters("completeness.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

private void saveLibraries003(String fileExtension, char separator) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
// do nothing
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/de/gwdg/metadataqa/marc/cli/Formatter.java
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
if (writer != null) {
try {
writer.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
String fileExtension = ".csv";
final char separator = getSeparator(parameters.getFormat());
if (parameters.getFormat().equals(ValidationErrorFormat.TAB_SEPARATED)) {
Expand All @@ -208,7 +208,7 @@ public void afterIteration(int numberOfprocessedRecords) {
saveHistogram(percentHistogram, fileExtension, separator);

saveMapping(fileExtension, separator);
saveParameters("functions.params.json", parameters);
saveParameters("functions.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

private void saveMapping(String fileExtension,
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/MarcToSolr.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
client.commit();
saveParameters("marctosolr.params.json", parameters);
saveParameters("marctosolr.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -127,14 +128,14 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
try {
networkWriter.close();
} catch (IOException e) {
logger.severe("Failed to close networkWriter. " + e.getLocalizedMessage());
logger.log(Level.SEVERE, "afterIteration", e);
}
saveParameters("network.params.json", parameters);
saveParameters("network.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

@Override
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/de/gwdg/metadataqa/marc/cli/QACli.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import de.gwdg.metadataqa.marc.CsvUtils;
import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
import de.gwdg.metadataqa.marc.cli.parameters.ValidatorParameters;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
Expand All @@ -11,6 +12,7 @@
import de.gwdg.metadataqa.marc.utils.pica.path.PicaPathParser;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DurationFormatUtils;

import java.io.File;
import java.io.IOException;
Expand All @@ -21,9 +23,11 @@
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -50,12 +54,26 @@ protected void initializeGroups(String groupBy, boolean isPica) {
}

protected void saveParameters(String fileName, T parameters) {
saveParameters(fileName, parameters, null);
}

protected void saveParameters(String fileName, T parameters, Map<String, Object> results) {
// Map<String, Object> responseDao = Map.of("parameters", parameters, "results", results);
ObjectMapper mapper = new ObjectMapper();
try {
String json = mapper.writeValueAsString(parameters);
Map<String, Object> configuration = mapper.readValue(json, new TypeReference<>(){});
configuration.put("mqaf.version", de.gwdg.metadataqa.api.cli.Version.getVersion());
configuration.put("qa-catalogue.version", de.gwdg.metadataqa.marc.cli.Version.getVersion());
if (results != null)
for (Map.Entry<String, Object> entry : results.entrySet()) {
Object value = entry.getValue();
if (entry.getKey().equals("duration")) {
value = Utils.formatDuration((long) value);
}
configuration.put(entry.getKey(), value);
}

File configFile = Paths.get(parameters.getOutputDir(), fileName).toFile();
logger.log(Level.INFO, "Saving configuration to {0}.", configFile.getAbsolutePath());
mapper.writeValue(configFile, configuration);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/SerialScore.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
printHistogram();
saveParameters("serials.params.json", parameters);
saveParameters("serials.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

private void printHistogram() {
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/Shacl4bib.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.io.Serializable;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

Expand Down Expand Up @@ -125,9 +126,9 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
copySchaclFileToOutputDir();
saveParameters("shacl4bib.params.json", parameters);
saveParameters("shacl4bib.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

private void copySchaclFileToOutputDir() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
saveParameters("shelf-ready-completeness.params.json", parameters);
public void afterIteration(int numberOfprocessedRecords, long duration) {
saveParameters("shelf-ready-completeness.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ public void fileProcessed() {
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
saveParameters("tt-completeness.params.json", parameters);
public void afterIteration(int numberOfprocessedRecords, long duration) {
saveParameters("tt-completeness.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

@Override
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/ValidatorCli.java
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ private void processSummary(BibliographicRecord marcRecord,
}

@Override
public void afterIteration(int numberOfprocessedRecords) {
public void afterIteration(int numberOfprocessedRecords, long duration) {
logger.info("printCounter");
this.numberOfprocessedRecords = numberOfprocessedRecords;
printCounter();
Expand All @@ -314,7 +314,7 @@ public void afterIteration(int numberOfprocessedRecords) {
copySchemaFileToOutputDir();

logger.info("all printing is DONE");
saveParameters("validation.params.json", parameters);
saveParameters("validation.params.json", parameters, Map.of("numberOfprocessedRecords", numberOfprocessedRecords, "duration", duration));
}

private void copySchemaFileToOutputDir() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public interface BibliographicInputProcessor {
void beforeIteration();
void fileOpened(Path path);
void fileProcessed();
void afterIteration(int numberOfprocessedRecords);
void afterIteration(int numberOfprocessedRecords, long duration);
void printHelp(Options options);
boolean readyToProcess();
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package de.gwdg.metadataqa.marc.cli.utils;

import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.MarcFactory;
Expand Down Expand Up @@ -48,6 +49,7 @@ public class RecordIterator {
private PicaSchemaManager picaSchema;
private String status = "waits";
private boolean processWithEroors = false;
private long start;

public RecordIterator(BibliographicInputProcessor processor) {
this.processor = processor;
Expand All @@ -56,7 +58,7 @@ public RecordIterator(BibliographicInputProcessor processor) {

public void start() {

long start = System.currentTimeMillis();
start = System.currentTimeMillis();
processor.beforeIteration();
parameters = processor.getParameters();

Expand Down Expand Up @@ -87,12 +89,12 @@ public void start() {
}
}

processor.afterIteration(recordNumber);

long end = System.currentTimeMillis();
processor.afterIteration(recordNumber, (end - start));

long duration = (end - start) / 1000;
if (parameters.doLog())
logger.log(Level.INFO, "Bye! It took: " + DurationFormatUtils.formatDuration(end - start, "d HH:mm:ss", true));
logger.log(Level.INFO, "Bye! It took: " + Utils.formatDuration(end - start));
// logger.log(Level.INFO, "Bye! It took: " + LocalTime.MIN.plusSeconds(duration).format(DateTimeFormatter.ofPattern("d HH:mm:ss")));

status = "done";
Expand Down Expand Up @@ -278,4 +280,8 @@ public String getStatus() {
public void setProcessWithEroors(boolean processWithEroors) {
this.processWithEroors = processWithEroors;
}

public long getStart() {
return start;
}
}
7 changes: 7 additions & 0 deletions src/test/java/de/gwdg/metadataqa/marc/UtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,11 @@ public void base64decode() {
"[email protected] !~ \"^L\" && [email protected] !~ \"^..[iktN]\" && ([email protected] !~ \"^.v\" || 021A.a?)\n",
Utils.base64decode("MDAyQC4wICF+ICJeTCIgJiYgMDAyQC4wICF+ICJeLi5baWt0Tl0iICYmICgwMDJALjAgIX4gIl4udiIgfHwgMDIxQS5hPykK"));
}

@Test
public void formatDuration() {
assertEquals("00:00:00", Utils.formatDuration(0));
assertEquals("1d 00:00:00", Utils.formatDuration((24 * 60 * 60 * 1000)));
assertEquals("1d 00:00:01", Utils.formatDuration((24 * 60 * 60 * 1000) + 1000));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,9 @@ public void completeness_pica_groupBy_file() throws Exception {
assertTrue(line.contains("\"replacementInControlFields\":null,"));
assertTrue(line.contains("\"marc21\":false,"));
assertTrue(line.contains("\"mqaf.version\":\"0.9.3\","));
assertTrue(line.contains("\"qa-catalogue.version\":\"0.8.0-SNAPSHOT\"}"));
assertTrue(line.contains("\"qa-catalogue.version\":\"0.8.0-SNAPSHOT\""));
assertTrue(line.contains("\"duration\":\"00:00:00\""));
assertTrue(line.contains("\"numberOfprocessedRecords\":10"));

} else if (outputFile.equals("libraries.csv")) {
output = new File(outputDir, outputFile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ public void validate_pica_groupBy() throws Exception {
assertTrue(line.contains("\"replacementInControlFields\":null,"));
assertTrue(line.contains("\"marc21\":false,"));
assertTrue(line.contains("\"mqaf.version\":\"0.9.3\","));
assertTrue(line.contains("\"qa-catalogue.version\":\"0.8.0-SNAPSHOT\"}"));
assertTrue(line.contains("\"qa-catalogue.version\":\"0.8.0-SNAPSHOT\""));
assertTrue(line.contains("\"duration\":\"00:00:00\""));
assertTrue(line.contains("\"numberOfprocessedRecords\":10"));

} else {
fail("Untested output file: " + outputFile);
Expand Down

0 comments on commit c594b3e

Please sign in to comment.