Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the metadata information sent to DataCite, by adding fundingReferences #73

Merged
merged 8 commits into from
Sep 19, 2023
163 changes: 154 additions & 9 deletions src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,34 @@

import edu.harvard.iq.dataverse.branding.BrandingUtil;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.io.*;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.persistence.TypedQuery;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import edu.harvard.iq.dataverse.settings.JvmSettings;
import org.apache.commons.text.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
*
Expand Down Expand Up @@ -561,9 +566,69 @@ public String generateXML(DvObject dvObject) {
xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers);

xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString());

xmlMetadata = this.addFundingReferences(dvObject, xmlMetadata);

return xmlMetadata;
}

private String addFundingReferences(DvObject dvObject, String xmlMetadata) {
try {
if (dvObject.isInstanceofDataset()) {
Dataset dataset = (Dataset) dvObject;
List<Map<String, String>> grantNumberChildValues = this.extractGrantNumberValues(dataset);
if (!grantNumberChildValues.isEmpty()) {
org.w3c.dom.Document xmlDocument = DataCiteMetadataUtil.parseXml(xmlMetadata);
xmlDocument = this.appendFundingReferences(grantNumberChildValues, xmlDocument);
xmlMetadata = DataCiteMetadataUtil.prettyPrintXML(xmlDocument, 4);
}
}
} catch(Exception e) {
logger.log(Level.SEVERE, "Error adding fundingReferences to the DataCite Metadata: {0}", e.getMessage());
}
return xmlMetadata;
}

private List<Map<String, String>> extractGrantNumberValues(Dataset dataset) {
List<Map<String, String>> grantNumberChildValues = new ArrayList<>();
List<DatasetField> grantNumberDatasetFields = DataCiteMetadataUtil.searchForFirstLevelDatasetFields(dataset, DatasetFieldConstant.grantNumber);
//There should only be one DatasetField with name 'grantNumber' (Premise: There are values for grantNumber)
if(!grantNumberDatasetFields.isEmpty()){
DatasetField grantNumber = grantNumberDatasetFields.get(0);
grantNumberChildValues = DataCiteMetadataUtil.extractCompoundValueChildDatasetFieldValues(grantNumber);
}
return grantNumberChildValues;
}

/**
* <pre>
* Appends fundingReferences to the DataCite xml.
* Mappings:
* - grantNumberAgency -> funderName
* - grantNumberValue -> awardNumber
* </pre>
*
* @param grantNumberChildValues
* @param xmlDocument
* @return The xmlDocument with fundingReferences
*/
private org.w3c.dom.Document appendFundingReferences(List<Map<String, String>> grantNumberChildValues, org.w3c.dom.Document xmlDocument) {
for (Map<String, String> childValue : grantNumberChildValues) {
// funderName (=grantNumberAgency) is a required subfield of fundingReference
if (childValue.containsKey(DatasetFieldConstant.grantNumberAgency)) {
if(xmlDocument.getElementsByTagName("fundingReferences").getLength() == 0){
DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "resource", "fundingReferences", null);
}
DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReferences", "fundingReference", null);
DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "funderName", childValue.get(DatasetFieldConstant.grantNumberAgency));
if (childValue.containsKey(DatasetFieldConstant.grantNumberValue)) {
DataCiteMetadataUtil.appendElementToDocument(xmlDocument, "fundingReference", "awardNumber", childValue.get(DatasetFieldConstant.grantNumberValue));
}
}
}
return xmlDocument;
}

private String generateRelatedIdentifiers(DvObject dvObject) {

StringBuilder sb = new StringBuilder();
Expand Down Expand Up @@ -726,3 +791,83 @@ public static String getStrFromList(List<String> authors) {
}

}

class DataCiteMetadataUtil {

public static org.w3c.dom.Document parseXml(String xml) throws ParserConfigurationException, IOException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
org.w3c.dom.Document document = builder.parse(new InputSource(new StringReader(xml)));

return document;
}

/**
* Append Element to the last parent element in order.
*
* @param document
* @param parentTagName
* @param tagName
* @param textContent
*/
public static void appendElementToDocument(org.w3c.dom.Document document, String parentTagName, String tagName, String textContent) {
org.w3c.dom.Element element = document.createElement(tagName);
if(textContent != null && !textContent.isEmpty()) {
element.setTextContent(textContent);
}
org.w3c.dom.NodeList parentElements = document.getElementsByTagName(parentTagName);
if(parentElements.getLength() > 0){
org.w3c.dom.Element lastParentElement = (org.w3c.dom.Element) parentElements.item(parentElements.getLength() - 1);
lastParentElement.appendChild(element);
}
}

public static String prettyPrintXML(org.w3c.dom.Document document, int indent) throws TransformerException {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
InputStream inputStream = DataCiteMetadataTemplate.class.getResourceAsStream("prettyprint.xsl");
String prettyPrintXsl = Util.readAndClose(inputStream, "utf-8");
Transformer transformer = transformerFactory.newTransformer(new StreamSource(new StringReader(prettyPrintXsl)));
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent));
transformer.setOutputProperty(OutputKeys.STANDALONE, "no");

StringWriter stringWriter = new StringWriter();
transformer.transform(new DOMSource(document), new StreamResult(stringWriter));
return stringWriter.toString();
}

/**
* Search for a fist-level DatasetFields by name.
*
* @param dataset
* @param datasetFieldName
* @return List of DatasetFields with the given name.
*/
public static List<DatasetField> searchForFirstLevelDatasetFields(Dataset dataset, String datasetFieldName) {
List<DatasetField> datasetFields = new ArrayList<>();
for (DatasetField datasetField : dataset.getLatestVersion().getDatasetFields()) {
if (datasetField.getDatasetFieldType().getName().equals(datasetFieldName)) {
datasetFields.add(datasetField);
}
}
return datasetFields;
}

public static List<Map<String, String>> extractCompoundValueChildDatasetFieldValues(DatasetField datasetField){
List<Map<String, String>> fieldValues = new ArrayList<>();
for (DatasetFieldCompoundValue compoundValue : datasetField.getDatasetFieldCompoundValues()) {
fieldValues.add(DataCiteMetadataUtil.extractChildDatasetFieldValues(compoundValue));
}
return fieldValues;
}

public static Map<String, String> extractChildDatasetFieldValues(DatasetFieldCompoundValue datasetFieldCompoundValue) {
Map<String, String> datasetFieldValues = new HashMap<>();
for (DatasetField childDatasetField : datasetFieldCompoundValue.getChildDatasetFields()) {
datasetFieldValues.put(childDatasetField.getDatasetFieldType().getName(), childDatasetField.getValue());
}
return datasetFieldValues;
}

}
11 changes: 11 additions & 0 deletions src/main/resources/edu/harvard/iq/dataverse/prettyprint.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:strip-space elements="*"/>
<xsl:output method="xml" encoding="UTF-8"/>

<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>

</xsl:stylesheet>