From e2d31129e444e2ee18e803a255328e1684a00f5c Mon Sep 17 00:00:00 2001 From: helkv Date: Wed, 23 Aug 2023 11:24:03 +0200 Subject: [PATCH 1/8] #61 First draft: Add 'Funding Reference' to DataCite Metadata XML --- .../dataverse/DOIDataCiteRegisterService.java | 51 +++++++++++++++++-- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index b54dfffd1d2..414f1e3e7da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -7,10 +7,7 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; +import java.io.*; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -29,6 +26,7 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.parser.Parser; import org.jsoup.select.Elements; /** @@ -558,12 +556,57 @@ public String generateXML(DvObject dvObject) { String relIdentifiers = generateRelatedIdentifiers(dvObject); + xmlMetadata = this.addGrantInformation(dvObject, xmlMetadata); + xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); return xmlMetadata; } + private String addGrantInformation(DvObject dvObject, String xmlMetadata) { + StringBuilder sb = new StringBuilder(); + + Dataset dataset = (Dataset) dvObject; + String grantAgency = ""; + String grantNumber = ""; + if (dvObject.isInstanceofDataset()) { + for (DatasetField field : dataset.getLatestVersion().getDatasetFields()) { + if (field.getDatasetFieldType().getName().equals("grantNumber")) { + for (DatasetFieldCompoundValue compoundValue : field.getDatasetFieldCompoundValues()) { + for (DatasetField child : compoundValue.getChildDatasetFields()) { + switch (child.getDatasetFieldType().getName()) { + case "grantNumberAgency": grantAgency = child.getValue(); break; + case "grantNumberValue": grantNumber = child.getValue(); break; + default: break; + } + } + } + //TODO Check if grantInformation is not empty... + //sb.append(""); + sb.append(""); + sb.append("" + grantAgency + ""); + sb.append("" + grantNumber + ""); + sb.append(""); + //sb.append(""); + } + } + }else{ + //TODO ... + } + + String grantInformation = sb.toString(); + + Document doc = Jsoup.parse(xmlMetadata, "", Parser.xmlParser()); + Element resourceElement = doc.select("resource").get(0); + Element grantElement = doc.createElement("fundingReferences"); + grantElement.html(grantInformation); + resourceElement.appendChild(grantElement); + xmlMetadata = doc.toString(); + + return xmlMetadata; + } + private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); From ba5fbff5567d8e7201d7244b1cd8fc0057a049d3 Mon Sep 17 00:00:00 2001 From: helkv Date: Wed, 23 Aug 2023 16:19:25 +0200 Subject: [PATCH 2/8] #61 First draft: Use pretty print for DataCite Metadata XML Problem: formatAsInlineTags can not be changed in Jsoup => Alle xml Elements and Values are in new lines --- .../edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index 414f1e3e7da..4b36f7cf67f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -602,6 +602,7 @@ private String addGrantInformation(DvObject dvObject, String xmlMetadata) { Element grantElement = doc.createElement("fundingReferences"); grantElement.html(grantInformation); resourceElement.appendChild(grantElement); + doc.outputSettings().indentAmount(4).prettyPrint(true); //Problem: formatAsInlineTags can not be changed xmlMetadata = doc.toString(); return xmlMetadata; From 8dc51a7c467c48ec69446413f6f7b81ea18a3a84 Mon Sep 17 00:00:00 2001 From: helkv Date: Mon, 28 Aug 2023 09:01:47 +0200 Subject: [PATCH 3/8] #61 First draft: Use workaround to format inlineTags in Jsoup Problem: Java reflection should not be used to change the inlineTags in Jsoup --- .../dataverse/DOIDataCiteRegisterService.java | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index 4b36f7cf67f..a4ee920aa38 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -8,6 +8,8 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; import java.io.*; +import java.lang.reflect.Field; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -27,6 +29,7 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.parser.Parser; +import org.jsoup.parser.Tag; import org.jsoup.select.Elements; /** @@ -597,17 +600,55 @@ private String addGrantInformation(DvObject dvObject, String xmlMetadata) { String grantInformation = sb.toString(); + //TODO: Replace this Workaround to format leaf node tags as inline tags in Jsoup using reflection. + this.formatLeafNodeAsInlineTagsInJsoup(xmlMetadata); Document doc = Jsoup.parse(xmlMetadata, "", Parser.xmlParser()); Element resourceElement = doc.select("resource").get(0); Element grantElement = doc.createElement("fundingReferences"); grantElement.html(grantInformation); resourceElement.appendChild(grantElement); - doc.outputSettings().indentAmount(4).prettyPrint(true); //Problem: formatAsInlineTags can not be changed + doc.outputSettings().indentAmount(4).prettyPrint(true); xmlMetadata = doc.toString(); return xmlMetadata; } + + /** + * + * This method is used to format leaf node tags as inline tags in Jsoup using reflection. + * See: https://github.com/jhy/jsoup/issues/1428 + * + * @param xmlMetadata + */ + private void formatLeafNodeAsInlineTagsInJsoup(String xmlMetadata){ + try { + Document doc = Jsoup.parse(xmlMetadata, "", Parser.xmlParser()); + + //String[] inlineTags = {"identifier", "publisher", "publicationYear", "description", "contributors", "funderName"}; + List inlineTags = new java.util.ArrayList(); + doc.getAllElements().forEach(element -> { + if (element.childrenSize() == 0) { + inlineTags.add(element.tagName()); + } + }); + + for(String tagName : inlineTags) { + Tag tag = Tag.valueOf(tagName); + Field field = null; + field = Tag.class.getDeclaredField("formatAsBlock"); + field.setAccessible(true); + field.set(tag, false); + + Method method = Tag.class.getDeclaredMethod("register", Tag.class); + method.setAccessible(true); + method.invoke(null, tag); + } + } catch (Exception e) { + logger.log(Level.WARNING, "Error changing the format for Jsoup: " + e.getMessage()); + } + } + private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); From d84e8e56a7d1d4c4e29ec63cfe8a491923374401 Mon Sep 17 00:00:00 2001 From: helkv Date: Mon, 28 Aug 2023 10:06:50 +0200 Subject: [PATCH 4/8] #61 Revert First draft (Jsoup) 3 commits --- .../dataverse/DOIDataCiteRegisterService.java | 93 +------------------ 1 file changed, 4 insertions(+), 89 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index a4ee920aa38..b54dfffd1d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -7,9 +7,10 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; -import java.io.*; -import java.lang.reflect.Field; -import java.lang.reflect.Method; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -28,8 +29,6 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.parser.Parser; -import org.jsoup.parser.Tag; import org.jsoup.select.Elements; /** @@ -559,96 +558,12 @@ public String generateXML(DvObject dvObject) { String relIdentifiers = generateRelatedIdentifiers(dvObject); - xmlMetadata = this.addGrantInformation(dvObject, xmlMetadata); - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); return xmlMetadata; } - private String addGrantInformation(DvObject dvObject, String xmlMetadata) { - StringBuilder sb = new StringBuilder(); - - Dataset dataset = (Dataset) dvObject; - String grantAgency = ""; - String grantNumber = ""; - if (dvObject.isInstanceofDataset()) { - for (DatasetField field : dataset.getLatestVersion().getDatasetFields()) { - if (field.getDatasetFieldType().getName().equals("grantNumber")) { - for (DatasetFieldCompoundValue compoundValue : field.getDatasetFieldCompoundValues()) { - for (DatasetField child : compoundValue.getChildDatasetFields()) { - switch (child.getDatasetFieldType().getName()) { - case "grantNumberAgency": grantAgency = child.getValue(); break; - case "grantNumberValue": grantNumber = child.getValue(); break; - default: break; - } - } - } - //TODO Check if grantInformation is not empty... - //sb.append(""); - sb.append(""); - sb.append("" + grantAgency + ""); - sb.append("" + grantNumber + ""); - sb.append(""); - //sb.append(""); - } - } - }else{ - //TODO ... - } - - String grantInformation = sb.toString(); - - //TODO: Replace this Workaround to format leaf node tags as inline tags in Jsoup using reflection. - this.formatLeafNodeAsInlineTagsInJsoup(xmlMetadata); - Document doc = Jsoup.parse(xmlMetadata, "", Parser.xmlParser()); - Element resourceElement = doc.select("resource").get(0); - Element grantElement = doc.createElement("fundingReferences"); - grantElement.html(grantInformation); - resourceElement.appendChild(grantElement); - doc.outputSettings().indentAmount(4).prettyPrint(true); - xmlMetadata = doc.toString(); - - return xmlMetadata; - } - - - /** - * - * This method is used to format leaf node tags as inline tags in Jsoup using reflection. - * See: https://github.com/jhy/jsoup/issues/1428 - * - * @param xmlMetadata - */ - private void formatLeafNodeAsInlineTagsInJsoup(String xmlMetadata){ - try { - Document doc = Jsoup.parse(xmlMetadata, "", Parser.xmlParser()); - - //String[] inlineTags = {"identifier", "publisher", "publicationYear", "description", "contributors", "funderName"}; - List inlineTags = new java.util.ArrayList(); - doc.getAllElements().forEach(element -> { - if (element.childrenSize() == 0) { - inlineTags.add(element.tagName()); - } - }); - - for(String tagName : inlineTags) { - Tag tag = Tag.valueOf(tagName); - Field field = null; - field = Tag.class.getDeclaredField("formatAsBlock"); - field.setAccessible(true); - field.set(tag, false); - - Method method = Tag.class.getDeclaredMethod("register", Tag.class); - method.setAccessible(true); - method.invoke(null, tag); - } - } catch (Exception e) { - logger.log(Level.WARNING, "Error changing the format for Jsoup: " + e.getMessage()); - } - } - private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); From c14c48a39b14a62c18c9a329b8e7ee7719213159 Mon Sep 17 00:00:00 2001 From: helkv Date: Mon, 28 Aug 2023 17:45:36 +0200 Subject: [PATCH 5/8] #61 Add 'Funding Reference' to DataCite Metadata XML (Second draft) Add 'Funding Reference' to DataCite Metadata XML using: - Java DOM Parser - Extra prettyPrint method - Inner Class DataCiteMetadataUtil --- .../dataverse/DOIDataCiteRegisterService.java | 102 ++++++++++++++++-- .../edu/harvard/iq/dataverse/prettyprint.xsl | 11 ++ 2 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 src/main/resources/edu/harvard/iq/dataverse/prettyprint.xsl diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index b54dfffd1d2..fc061945a8f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -7,15 +7,8 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.io.*; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import javax.ejb.EJB; @@ -23,6 +16,16 @@ import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.TypedQuery; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.text.StringEscapeUtils; @@ -30,6 +33,8 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; /** * @@ -561,6 +566,30 @@ public String generateXML(DvObject dvObject) { xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); + + xmlMetadata = extendXmlMetadata(dvObject, xmlMetadata); + + return xmlMetadata; + } + + private String extendXmlMetadata(DvObject dvObject, String xmlMetadata) { + try { + Optional grantAgency = DataCiteMetadataUtil.readDatasetFieldValue(dvObject, DatasetFieldConstant.grantNumber, DatasetFieldConstant.grantNumberAgency); + Optional grantNumber = DataCiteMetadataUtil.readDatasetFieldValue(dvObject, DatasetFieldConstant.grantNumber, DatasetFieldConstant.grantNumberValue); + + if(grantAgency.isPresent() || grantNumber.isPresent()) { + org.w3c.dom.Document xmlDocument = DataCiteMetadataUtil.parseXml(xmlMetadata); + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "resource", "fundingReferences", null); + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReferences", "fundingReference", null); + grantAgency.ifPresent(grantAgencyValue -> DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "funderName", grantAgencyValue)); + grantNumber.ifPresent(grantNumberValue -> DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "awardNumber", grantNumberValue)); + + xmlMetadata = DataCiteMetadataUtil.prettyPrintXML(xmlDocument, 4); + } + } catch(Exception e) { + logger.log(Level.SEVERE, "Error extending xmlMetadata: {0}", e.getMessage()); + } + return xmlMetadata; } @@ -726,3 +755,58 @@ public static String getStrFromList(List authors) { } } + +class DataCiteMetadataUtil { + + public static org.w3c.dom.Document parseXml(String xml) throws ParserConfigurationException, IOException, SAXException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + org.w3c.dom.Document document = builder.parse(new InputSource(new StringReader(xml))); + + return document; + } + + public static void appendElementToDocument(org.w3c.dom.Document document, String parentTagName, String tagName, String textContent) { + org.w3c.dom.Element element = document.createElement(tagName); + if(textContent != null && !textContent.isEmpty()) { + element.setTextContent(textContent); + } + org.w3c.dom.Element parentElement = (org.w3c.dom.Element) document.getElementsByTagName(parentTagName).item(0); + if(parentElement != null){ + parentElement.appendChild(element); + } + } + + public static String prettyPrintXML(org.w3c.dom.Document document, int indent) throws TransformerException { + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + InputStream inputStream = DataCiteMetadataTemplate.class.getResourceAsStream("prettyprint.xsl"); + String prettyPrintXsl = Util.readAndClose(inputStream, "utf-8"); + Transformer transformer = transformerFactory.newTransformer(new StreamSource(new StringReader(prettyPrintXsl))); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent)); + transformer.setOutputProperty(OutputKeys.STANDALONE, "no"); + + StringWriter stringWriter = new StringWriter(); + transformer.transform(new DOMSource(document), new StreamResult(stringWriter)); + return stringWriter.toString(); + } + + public static Optional readDatasetFieldValue(DvObject dvObject, String parentFieldName, String fieldName) { + if (dvObject.isInstanceofDataset()) { + Dataset dataset = (Dataset) dvObject; + for (DatasetField field : dataset.getLatestVersion().getDatasetFields()) { + if (field.getDatasetFieldType().getName().equals(parentFieldName)) { + for (DatasetFieldCompoundValue compoundValue : field.getDatasetFieldCompoundValues()) { + for (DatasetField child : compoundValue.getChildDatasetFields()) { + if (child.getDatasetFieldType().getName().equals(fieldName)) { + return Optional.of(child.getValue()); + } + } + } + } + } + } + return Optional.empty(); + } +} diff --git a/src/main/resources/edu/harvard/iq/dataverse/prettyprint.xsl b/src/main/resources/edu/harvard/iq/dataverse/prettyprint.xsl new file mode 100644 index 00000000000..3941269f403 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/prettyprint.xsl @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file From bc48f0ca84b6291468b9718d74915b9e17adfc35 Mon Sep 17 00:00:00 2001 From: helkv Date: Mon, 4 Sep 2023 18:18:01 +0200 Subject: [PATCH 6/8] #61 Add the case: Multiple or empty 'Funding Reference' values --- .../dataverse/DOIDataCiteRegisterService.java | 92 +++++++++++++------ 1 file changed, 65 insertions(+), 27 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index fc061945a8f..1c860971292 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -567,27 +567,40 @@ public String generateXML(DvObject dvObject) { xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - xmlMetadata = extendXmlMetadata(dvObject, xmlMetadata); + xmlMetadata = addGrantNumberMetadata(dvObject, xmlMetadata); return xmlMetadata; } - private String extendXmlMetadata(DvObject dvObject, String xmlMetadata) { + private String addGrantNumberMetadata(DvObject dvObject, String xmlMetadata) { try { - Optional grantAgency = DataCiteMetadataUtil.readDatasetFieldValue(dvObject, DatasetFieldConstant.grantNumber, DatasetFieldConstant.grantNumberAgency); - Optional grantNumber = DataCiteMetadataUtil.readDatasetFieldValue(dvObject, DatasetFieldConstant.grantNumber, DatasetFieldConstant.grantNumberValue); + Dataset dataset = (Dataset) dvObject; + List> grantNumberChildValues = new ArrayList<>(); + List grantNumberDatasetFields = DataCiteMetadataUtil.searchForFirstLevelDatasetFields(dataset, DatasetFieldConstant.grantNumber); + if(!grantNumberDatasetFields.isEmpty()){ + //There should only be one 'grantNumber' DatasetField + DatasetField datasetField = grantNumberDatasetFields.get(0); + grantNumberChildValues = DataCiteMetadataUtil.extractCompoundValueChildDatasetFieldValues(datasetField); + } - if(grantAgency.isPresent() || grantNumber.isPresent()) { + if(!grantNumberChildValues.isEmpty()) { org.w3c.dom.Document xmlDocument = DataCiteMetadataUtil.parseXml(xmlMetadata); DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "resource", "fundingReferences", null); - DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReferences", "fundingReference", null); - grantAgency.ifPresent(grantAgencyValue -> DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "funderName", grantAgencyValue)); - grantNumber.ifPresent(grantNumberValue -> DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "awardNumber", grantNumberValue)); - + for (Map childValue : grantNumberChildValues) { + if (!childValue.isEmpty()) { + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReferences", "fundingReference", null); + if(childValue.get(DatasetFieldConstant.grantNumberAgency) != null) { + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "funderName", childValue.get(DatasetFieldConstant.grantNumberAgency)); + } + if(childValue.get(DatasetFieldConstant.grantNumberValue) != null) { + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "awardNumber", childValue.get(DatasetFieldConstant.grantNumberValue)); + } + } + } xmlMetadata = DataCiteMetadataUtil.prettyPrintXML(xmlDocument, 4); } } catch(Exception e) { - logger.log(Level.SEVERE, "Error extending xmlMetadata: {0}", e.getMessage()); + logger.log(Level.SEVERE, "Error adding grantNumber to the DataCite Metadata: {0}", e.getMessage()); } return xmlMetadata; @@ -766,14 +779,23 @@ public static org.w3c.dom.Document parseXml(String xml) throws ParserConfigurati return document; } + /** + * Append Element to the last parent element. + * + * @param document + * @param parentTagName + * @param tagName + * @param textContent + */ public static void appendElementToDocument(org.w3c.dom.Document document, String parentTagName, String tagName, String textContent) { org.w3c.dom.Element element = document.createElement(tagName); if(textContent != null && !textContent.isEmpty()) { element.setTextContent(textContent); } - org.w3c.dom.Element parentElement = (org.w3c.dom.Element) document.getElementsByTagName(parentTagName).item(0); - if(parentElement != null){ - parentElement.appendChild(element); + org.w3c.dom.NodeList parentElements = document.getElementsByTagName(parentTagName); + if(parentElements.getLength() > 0){ + org.w3c.dom.Element lastParentElement = (org.w3c.dom.Element) parentElements.item(parentElements.getLength() - 1); + lastParentElement.appendChild(element); } } @@ -792,21 +814,37 @@ public static String prettyPrintXML(org.w3c.dom.Document document, int indent) t return stringWriter.toString(); } - public static Optional readDatasetFieldValue(DvObject dvObject, String parentFieldName, String fieldName) { - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - for (DatasetField field : dataset.getLatestVersion().getDatasetFields()) { - if (field.getDatasetFieldType().getName().equals(parentFieldName)) { - for (DatasetFieldCompoundValue compoundValue : field.getDatasetFieldCompoundValues()) { - for (DatasetField child : compoundValue.getChildDatasetFields()) { - if (child.getDatasetFieldType().getName().equals(fieldName)) { - return Optional.of(child.getValue()); - } - } - } - } + /** + * Search for a fist-level DatasetFields by name. + * + * @param dataset + * @param datasetFieldName + * @return List of DatasetFields with the given name. + */ + public static List searchForFirstLevelDatasetFields(Dataset dataset, String datasetFieldName) { + List datasetFields = new ArrayList<>(); + for (DatasetField datasetField : dataset.getLatestVersion().getDatasetFields()) { + if (datasetField.getDatasetFieldType().getName().equals(datasetFieldName)) { + datasetFields.add(datasetField); } } - return Optional.empty(); + return datasetFields; + } + + public static List> extractCompoundValueChildDatasetFieldValues(DatasetField datasetField){ + List> fieldValues = new ArrayList<>(); + for (DatasetFieldCompoundValue compoundValue : datasetField.getDatasetFieldCompoundValues()) { + fieldValues.add(DataCiteMetadataUtil.extractChildDatasetFieldValues(compoundValue)); + } + return fieldValues; + } + + public static Map extractChildDatasetFieldValues(DatasetFieldCompoundValue datasetFieldCompoundValue) { + Map datasetFieldValues = new HashMap<>(); + for (DatasetField childDatasetField : datasetFieldCompoundValue.getChildDatasetFields()) { + datasetFieldValues.put(childDatasetField.getDatasetFieldType().getName(), childDatasetField.getValue()); + } + return datasetFieldValues; } + } From 898389b1ab4d9338542a0e82272de05484191733 Mon Sep 17 00:00:00 2001 From: helkv Date: Mon, 11 Sep 2023 17:30:56 +0200 Subject: [PATCH 7/8] #61 Refactoring + Fix: funderName (grantNumberAgency) is required --- .../dataverse/DOIDataCiteRegisterService.java | 56 ++++++++++--------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index 1c860971292..5b5786e79d1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -575,37 +575,43 @@ public String generateXML(DvObject dvObject) { private String addGrantNumberMetadata(DvObject dvObject, String xmlMetadata) { try { Dataset dataset = (Dataset) dvObject; - List> grantNumberChildValues = new ArrayList<>(); - List grantNumberDatasetFields = DataCiteMetadataUtil.searchForFirstLevelDatasetFields(dataset, DatasetFieldConstant.grantNumber); - if(!grantNumberDatasetFields.isEmpty()){ - //There should only be one 'grantNumber' DatasetField - DatasetField datasetField = grantNumberDatasetFields.get(0); - grantNumberChildValues = DataCiteMetadataUtil.extractCompoundValueChildDatasetFieldValues(datasetField); - } - - if(!grantNumberChildValues.isEmpty()) { - org.w3c.dom.Document xmlDocument = DataCiteMetadataUtil.parseXml(xmlMetadata); - DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "resource", "fundingReferences", null); - for (Map childValue : grantNumberChildValues) { - if (!childValue.isEmpty()) { - DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReferences", "fundingReference", null); - if(childValue.get(DatasetFieldConstant.grantNumberAgency) != null) { - DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "funderName", childValue.get(DatasetFieldConstant.grantNumberAgency)); - } - if(childValue.get(DatasetFieldConstant.grantNumberValue) != null) { - DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "awardNumber", childValue.get(DatasetFieldConstant.grantNumberValue)); - } - } - } - xmlMetadata = DataCiteMetadataUtil.prettyPrintXML(xmlDocument, 4); - } + List> grantNumberChildValues = extractGrantNumberValues(dataset); + org.w3c.dom.Document xmlDocument = DataCiteMetadataUtil.parseXml(xmlMetadata); + xmlDocument = addGrantNumberMetadata(grantNumberChildValues, xmlDocument); + xmlMetadata = DataCiteMetadataUtil.prettyPrintXML(xmlDocument, 4); } catch(Exception e) { logger.log(Level.SEVERE, "Error adding grantNumber to the DataCite Metadata: {0}", e.getMessage()); } - return xmlMetadata; } + public List> extractGrantNumberValues(Dataset dataset) { + List> grantNumberChildValues = new ArrayList<>(); + List grantNumberDatasetFields = DataCiteMetadataUtil.searchForFirstLevelDatasetFields(dataset, DatasetFieldConstant.grantNumber); + if(!grantNumberDatasetFields.isEmpty()){ + //There should only be one 'grantNumber' DatasetField + DatasetField datasetField = grantNumberDatasetFields.get(0); + grantNumberChildValues = DataCiteMetadataUtil.extractCompoundValueChildDatasetFieldValues(datasetField); + } + return grantNumberChildValues; + } + + public org.w3c.dom.Document addGrantNumberMetadata(List> grantNumberChildValues, org.w3c.dom.Document xmlDocument) { + for (Map childValue : grantNumberChildValues) { + if (childValue.containsKey(DatasetFieldConstant.grantNumberAgency)) { + if(xmlDocument.getElementsByTagName("fundingReferences").getLength() == 0){ + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "resource", "fundingReferences", null); + } + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReferences", "fundingReference", null); + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "funderName", childValue.get(DatasetFieldConstant.grantNumberAgency)); + if (childValue.containsKey(DatasetFieldConstant.grantNumberValue)) { + DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "awardNumber", childValue.get(DatasetFieldConstant.grantNumberValue)); + } + } + } + return xmlDocument; + } + private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); From 3b67d7be1e544162c340c4b6de63df1c53b8139a Mon Sep 17 00:00:00 2001 From: helkv Date: Thu, 14 Sep 2023 17:10:08 +0200 Subject: [PATCH 8/8] #61 Small adaptions + Refactoring - Add check dvObject.isInstanceofDataset() - Refactoring --- .../dataverse/DOIDataCiteRegisterService.java | 45 +++++++++++++------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index 5b5786e79d1..1d4387671f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -567,37 +567,54 @@ public String generateXML(DvObject dvObject) { xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - xmlMetadata = addGrantNumberMetadata(dvObject, xmlMetadata); + xmlMetadata = this.addFundingReferences(dvObject, xmlMetadata); return xmlMetadata; } - private String addGrantNumberMetadata(DvObject dvObject, String xmlMetadata) { + private String addFundingReferences(DvObject dvObject, String xmlMetadata) { try { - Dataset dataset = (Dataset) dvObject; - List> grantNumberChildValues = extractGrantNumberValues(dataset); - org.w3c.dom.Document xmlDocument = DataCiteMetadataUtil.parseXml(xmlMetadata); - xmlDocument = addGrantNumberMetadata(grantNumberChildValues, xmlDocument); - xmlMetadata = DataCiteMetadataUtil.prettyPrintXML(xmlDocument, 4); + if (dvObject.isInstanceofDataset()) { + Dataset dataset = (Dataset) dvObject; + List> grantNumberChildValues = this.extractGrantNumberValues(dataset); + if (!grantNumberChildValues.isEmpty()) { + org.w3c.dom.Document xmlDocument = DataCiteMetadataUtil.parseXml(xmlMetadata); + xmlDocument = this.appendFundingReferences(grantNumberChildValues, xmlDocument); + xmlMetadata = DataCiteMetadataUtil.prettyPrintXML(xmlDocument, 4); + } + } } catch(Exception e) { - logger.log(Level.SEVERE, "Error adding grantNumber to the DataCite Metadata: {0}", e.getMessage()); + logger.log(Level.SEVERE, "Error adding fundingReferences to the DataCite Metadata: {0}", e.getMessage()); } return xmlMetadata; } - public List> extractGrantNumberValues(Dataset dataset) { + private List> extractGrantNumberValues(Dataset dataset) { List> grantNumberChildValues = new ArrayList<>(); List grantNumberDatasetFields = DataCiteMetadataUtil.searchForFirstLevelDatasetFields(dataset, DatasetFieldConstant.grantNumber); + //There should only be one DatasetField with name 'grantNumber' (Premise: There are values for grantNumber) if(!grantNumberDatasetFields.isEmpty()){ - //There should only be one 'grantNumber' DatasetField - DatasetField datasetField = grantNumberDatasetFields.get(0); - grantNumberChildValues = DataCiteMetadataUtil.extractCompoundValueChildDatasetFieldValues(datasetField); + DatasetField grantNumber = grantNumberDatasetFields.get(0); + grantNumberChildValues = DataCiteMetadataUtil.extractCompoundValueChildDatasetFieldValues(grantNumber); } return grantNumberChildValues; } - public org.w3c.dom.Document addGrantNumberMetadata(List> grantNumberChildValues, org.w3c.dom.Document xmlDocument) { + /** + *
+     * Appends fundingReferences to the DataCite xml.
+     * Mappings:
+     * - grantNumberAgency -> funderName
+     * - grantNumberValue -> awardNumber
+     * 
+ * + * @param grantNumberChildValues + * @param xmlDocument + * @return The xmlDocument with fundingReferences + */ + private org.w3c.dom.Document appendFundingReferences(List> grantNumberChildValues, org.w3c.dom.Document xmlDocument) { for (Map childValue : grantNumberChildValues) { + // funderName (=grantNumberAgency) is a required subfield of fundingReference if (childValue.containsKey(DatasetFieldConstant.grantNumberAgency)) { if(xmlDocument.getElementsByTagName("fundingReferences").getLength() == 0){ DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "resource", "fundingReferences", null); @@ -786,7 +803,7 @@ public static org.w3c.dom.Document parseXml(String xml) throws ParserConfigurati } /** - * Append Element to the last parent element. + * Append Element to the last parent element in order. * * @param document * @param parentTagName