From c5d7a43e396b363c675ed69883f7088bf075f54b Mon Sep 17 00:00:00 2001 From: Mikhail Podolskiy Date: Thu, 15 Aug 2024 14:10:44 +0200 Subject: [PATCH] #2487 Validate EML --- .../java/org/gbif/ipt/task/GenerateDwca.java | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/gbif/ipt/task/GenerateDwca.java b/src/main/java/org/gbif/ipt/task/GenerateDwca.java index d9ddc1fe9a..7c4c40d0cc 100644 --- a/src/main/java/org/gbif/ipt/task/GenerateDwca.java +++ b/src/main/java/org/gbif/ipt/task/GenerateDwca.java @@ -19,6 +19,7 @@ import org.gbif.dwc.ArchiveFile; import org.gbif.dwc.DwcFiles; import org.gbif.dwc.MetaDescriptorWriter; +import org.gbif.dwc.MetadataException; import org.gbif.dwc.terms.DwcTerm; import org.gbif.dwc.terms.Term; import org.gbif.dwc.terms.TermFactory; @@ -34,6 +35,9 @@ import org.gbif.ipt.service.admin.VocabulariesManager; import org.gbif.ipt.service.manage.SourceManager; import org.gbif.ipt.utils.MapUtils; +import org.gbif.metadata.eml.EMLProfileVersion; +import org.gbif.metadata.eml.EmlValidator; +import org.gbif.metadata.eml.InvalidEmlException; import org.gbif.utils.file.ClosableReportingIterator; import org.gbif.utils.file.CompressionUtil; import org.gbif.utils.file.csv.CSVReader; @@ -47,6 +51,7 @@ import java.io.StringWriter; import java.io.Writer; import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -75,6 +80,7 @@ import com.google.inject.Inject; import com.google.inject.assistedinject.Assisted; +import org.xml.sax.SAXException; public class GenerateDwca extends ReportingTask implements Callable> { @@ -386,6 +392,28 @@ private void validate() throws GeneratorException, InterruptedException { try { // retrieve newly generated archive - decompressed Archive arch = DwcFiles.fromLocation(dwcaFolder.toPath()); + + // validate EML + try { + addMessage(Level.INFO, "? Validating EML file"); + EmlValidator emlValidator = org.gbif.metadata.eml.EmlValidator.newValidator(EMLProfileVersion.GBIF_1_3); + String emlString = arch.getMetadata(); + emlValidator.validate(emlString); + addMessage(Level.INFO, "✓ Validated EML file"); + } catch (MetadataException | SAXException e) { + // some error validating this file, report + log.error("Exception caught while validating EML file", e); + addMessage(Level.ERROR, "Failed to validate EML file"); + setState(e); + throw new GeneratorException("Problem occurred while validating DwC-A (EML)", e); + } catch (InvalidEmlException e) { + // InvalidEmlException + log.error("Invalid EML", e); + addMessage(Level.ERROR, "Invalid EML file: " + e.getMessage()); + setState(e); + throw new GeneratorException("Invalid EML", e); + } + // populate basisOfRecord lookup HashMap loadBasisOfRecordMapFromVocabulary(); // perform validation on core file (includes core ID and basisOfRecord validation) @@ -402,7 +430,7 @@ private void validate() throws GeneratorException, InterruptedException { throw new GeneratorException("Problem occurred while validating DwC-A", e); } // final reporting - addMessage(Level.INFO, "Archive validated"); + addMessage(Level.INFO, "✓ Archive validated"); } /**