Skip to content

Commit

Permalink
DD-1112: Resolve date problems (#100)
Browse files Browse the repository at this point in the history
* renames + extend short dates only in profile
* concatenated dates by AMD transformer
  • Loading branch information
jo-pol authored Sep 16, 2022
1 parent 7986d81 commit 4fda369
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ class AmdTransformer(csvFile: File) {
def transform(xmlIn: Node, ddmCreated: NodeSeq): Try[Node] = {
if (ddmCreated.isEmpty)
return Failure(InvalidBagException("no date created found in DDM"))
val yearCreated = yearOf(ddmCreated.text)
val dateTimeCreated =new DateTime(ddmCreated.text)
val ddmCreatedString = ddmCreated.head.text
val yearCreated = yearOf(ddmCreatedString)
val dateTimeCreated =new DateTime(ddmCreatedString)
.toString(ISODateTimeFormat.dateTime())
val changedToPublished = (xmlIn \\ "stateChangeDate")
.filter(n => (n \ "toState").text.trim == "PUBLISHED")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha
_ = if (bagDir.isHidden && (amdIn \\ "datasetState").text != "DELETED")
throw InvalidBagException(s"Inactive bag does not have state DELETED: $amdFile")
fromVault = depositProps.getString("deposit.origin") == "VAULT"
amdOut <- configuration.amdTransformer.transform(amdIn, ddmOut \\ "created")
amdOut <- configuration.amdTransformer.transform(amdIn, ddmOut \ "profile" \ "created")
agreementsFile = metadata / "depositor-info" / "agreements.xml"
_ = checkAgreementsXml((amdOut \ "depositorId").text, agreementsFile)
provenanceXml = provenance.collectChangesInXmls(Seq(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,9 @@ class DdmTransformer(cfgDir: File, target: String, collectionsMap: Map[String, S
private val relationRewriteRule = RelationRewriteRule(cfgDir)
private val languageRewriteRule = LanguageRewriteRule(cfgDir / "languages.csv")

// produces additional content for DCMI
private lazy val archaeologyProfileRuleTransformer = new RuleTransformer(
private lazy val profileToNewForDcmiTransformer = new RuleTransformer(
acquisitionRewriteRule,
reportRewriteRule,
relationRewriteRule,
)

private def archaeologyDcmiRuleTransformer(newDcmiNodes: NodeSeq) = new RuleTransformer(
Expand All @@ -63,17 +61,14 @@ class DdmTransformer(cfgDir: File, target: String, collectionsMap: Map[String, S
DatesOfCollectionRewriteRule(newDcmiNodes),
languageRewriteRule,
DropFunderRoleRewriteRule,
ProfileDateRewriteRule,
ProfileRewriteRule,
)

private case class ArchaeologyRewriteRule(profileTitle: String, additionalDcmiNodes: NodeSeq) extends RewriteRule {
// defined local to have all creators of RuleTransformer next to one another
override def transform(node: Node): Seq[Node] = {
node.label match {
case "profile" =>
<profile>
{ node.nonEmptyChildren.flatMap(ProfileDateRewriteRule) }
</profile>.copy(prefix = node.prefix, attributes = node.attributes, scope = node.scope)
case "profile" => ProfileRewriteRule(node)
case "dcmiMetadata" =>
<dcmiMetadata>
{ distinctTitles(profileTitle, archaeologyDcmiRuleTransformer(additionalDcmiNodes)(node).nonEmptyChildren) }
Expand Down Expand Up @@ -123,32 +118,29 @@ class DdmTransformer(cfgDir: File, target: String, collectionsMap: Map[String, S

def transform(ddmIn: Node, datasetId: String): Try[Node] = {
trace(datasetId)
val tmp = collectionsMap.mapValues(_.size).filter(_._2>1).keys.toList.sortBy(identity)
trace(tmp.mkString(","))
val collectionDates = datesOfCollection(ddmIn)
val newDcmiNodes = missingLicense(ddmIn) ++
collectionDates ++
datesOfCollection(ddmIn) ++
collectionsMap.get(datasetId).toSeq.flatten ++
unknownRightsHolder(ddmIn) ++
funders(ddmIn)

val profile = ddmIn \ "profile"
val originalProfile = ddmIn \ "profile"

if (target != "archaeology") {
val transformer = standardRuleTransformer(newDcmiNodes, (profile \ "title").text)
val transformer = standardRuleTransformer(newDcmiNodes, (originalProfile \ "title").text)
Success(transformer(ddmIn))
}
else {
// a title in the profile will not change but may produce something for dcmiMetadata
val transformedProfile = profile.flatMap(archaeologyProfileRuleTransformer)
val fromFirstTitle = transformedProfile.flatMap(_.nonEmptyChildren)
.diff(profile.flatMap(_.nonEmptyChildren))
val notConvertedFirstTitle = transformedProfile \ "title"
val possiblyNewForDcmi = originalProfile.flatMap(profileToNewForDcmiTransformer)
val newTitlesForDcmi = possiblyNewForDcmi.flatMap(_.nonEmptyChildren)
.diff(originalProfile.flatMap(_.nonEmptyChildren))
val notConvertedFirstTitle = possiblyNewForDcmi \ "title"

// the transformation
val ddmRuleTransformer = new RuleTransformer(ArchaeologyRewriteRule(
profileTitle = (profile \ "title").text,
additionalDcmiNodes = fromFirstTitle ++ newDcmiNodes
profileTitle = (originalProfile \ "title").text,
additionalDcmiNodes = newTitlesForDcmi ++ newDcmiNodes
))
val ddmOut = ddmRuleTransformer(ddmIn)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,12 @@ package nl.knaw.dans.easy.bag2deposit.ddm
import scala.xml.transform.RewriteRule
import scala.xml.{ Elem, Node, Text }

object ProfileDateRewriteRule extends RewriteRule {
val DDM_NAMESPACE = "http://easy.dans.knaw.nl/schemas/md/ddm/"

override def transform(node: Node): Seq[Node] = node match {
case e: Elem if (e.label == "created" || e.label == "available") && e.namespace == DDM_NAMESPACE =>
if (e.text.trim.length == 7)
e.copy(child = Text(e.text.trim + "-01"))
else if (e.text.trim.length == 4)
e.copy(child = Text(e.text.trim + "-01-01"))
else e
case other => other
object ProfileRewriteRule extends RewriteRule {
override def transform(node: Node): Seq[Node] = {
if (node.label == "profile")
<profile>
{ node.nonEmptyChildren.flatMap(ShortDateRewriteRule) }
</profile>.copy(prefix = node.prefix, attributes = node.attributes, scope = node.scope)
else node
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (C) 2020 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.easy.bag2deposit.ddm

import scala.xml.transform.RewriteRule
import scala.xml.{ Elem, Node, Text }

object ShortDateRewriteRule extends RewriteRule {
override def transform(node: Node): Seq[Node] = {
node match {
case e: Elem if (e.label == "created" || e.label == "available") =>
if (e.text.trim.length == 7)
e.copy(child = Text(e.text.trim + "-01"))
else if (e.text.trim.length == 4)
e.copy(child = Text(e.text.trim + "-01-01"))
else e
case other => other
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ trait DdmSupport {
</dcx-dai:organization>
</dcx-dai:creatorDetails>
val created: Elem = <ddm:created>2013-03-01</ddm:created>
val available: Elem = <ddm:available>2013-04</ddm:available>
val available: Elem = <ddm:available>2013-04-01</ddm:available>
val archaeology: Elem = <ddm:audience>D37000</ddm:audience>
val openAccess: Elem = <ddm:accessRights>OPEN_ACCESS</ddm:accessRights>
val schemaLocations: String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import org.scalatest.matchers.should.Matchers
import java.nio.charset.Charset
import java.util.UUID
import scala.util.{ Failure, Success, Try }
import scala.xml.{ Utility, XML }
import scala.xml.{ NodeBuffer, Utility, XML }

class RewriteSpec extends AnyFlatSpec with XmlSupport with SchemaSupport with Matchers with DdmSupport with FileSystemSupport with AppConfigSupport {
private val cfgDir = File("src/main/assembly/dist/cfg")
Expand Down Expand Up @@ -248,6 +248,42 @@ class RewriteSpec extends AnyFlatSpec with XmlSupport with SchemaSupport with Ma
validate(expectedDDM) shouldBe Success(())
}

"dates" should "should only extend in the profile in both transformers" in {
def profile (dates: NodeBuffer) = <dc:title>RAMA 10</dc:title><dct:description/> +: creator +: dates +: <ddm:audience>D11200</ddm:audience> +: openAccess
val inputProfile = profile(<ddm:created>2013</ddm:created><ddm:available>2017-03</ddm:available>)
val expectedProfile = profile(<ddm:created>2013-01-01</ddm:created><ddm:available>2017-03-01</ddm:available>)
val inputDDM = ddm(
<ddm:profile>{ inputProfile }</ddm:profile>
<ddm:dcmiMetadata>
<ddm:created>2012</ddm:created>
</ddm:dcmiMetadata>
)
val expectedStandardDDM = ddm(
<ddm:profile>{ expectedProfile }</ddm:profile>
<ddm:dcmiMetadata>
<ddm:created>2012</ddm:created>
<dcterms:rightsHolder>Unknown</dcterms:rightsHolder>
</ddm:dcmiMetadata>
)
val expectedArchaeologyDDM = ddm(
<ddm:profile>{ expectedProfile }</ddm:profile>
<ddm:dcmiMetadata>
<ddm:created>2012</ddm:created>
<ddm:reportNumber schemeURI="https://data.cultureelerfgoed.nl/term/id/abr/7a99aaba-c1e7-49a4-9dd8-d295dbcc870e"
valueURI="https://data.cultureelerfgoed.nl/term/id/abr/05c754af-7944-4971-8280-9e1b4e474a8d"
subjectScheme="ABR Rapporten" reportNo="10">
RAMA 10
</ddm:reportNumber>
<dcterms:rightsHolder>Unknown</dcterms:rightsHolder>
</ddm:dcmiMetadata>
)

testConfig("SSH").ddmTransformer.transform(inputDDM, "eas-dataset:123").map(normalized)
.getOrElse(fail("no DDM returned")) shouldBe normalized(expectedStandardDDM)
testConfig("archaeology").ddmTransformer.transform(inputDDM, "eas-dataset:123").map(normalized)
.getOrElse(fail("no DDM returned")) shouldBe normalized(expectedArchaeologyDDM)
}

"datesOfCollection" should "convert a proper dates pair" in {
val ddmIn = ddm(title = "datesOfCollection test", audience = "D37000", dcmi =
<ddm:dcmiMetadata>
Expand Down

0 comments on commit 4fda369

Please sign in to comment.