Skip to content

Commit

Permalink
DD-443: Associate datasets with the chosen depositor-account (#39)
Browse files Browse the repository at this point in the history
* replace user in amd.xml
* provenance for replaced user
* group DDM related actions
* account-substitutes.csv optional
* speed up --help
  • Loading branch information
jo-pol authored May 20, 2021
1 parent 209f598 commit 23f0eb9
Show file tree
Hide file tree
Showing 26 changed files with 588 additions and 110 deletions.
2 changes: 2 additions & 0 deletions src/main/assembly/dist/cfg/account-substitutes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
removed-account, chosen-account
user001,USer
8 changes: 6 additions & 2 deletions src/main/scala/nl.knaw.dans.easy.bag2deposit/BagFacade.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@ object BagFacade {
case cause: Exception => Failure(InvalidBagException(s"$bagDir, $cause"))
}

def updateMetadata(bag: Bag): Try[Unit] = Try {
MetadataWriter.writeBagMetadata(bag.getMetadata, bag.getVersion, bag.getRootDir, bag.getFileEncoding)
def updateMetadata(bag: Bag): Try[Unit] = {
trace(bag.getRootDir)
Try {
MetadataWriter.writeBagMetadata(bag.getMetadata, bag.getVersion, bag.getRootDir, bag.getFileEncoding)
}
}

private val includeHiddenFiles = true
Expand All @@ -60,6 +63,7 @@ object BagFacade {
* @return
*/
def updatePayloadManifests(bag: Bag, payloadEntries: Path): Try[Unit] = Try {
trace(bag.getRootDir)
if (!payloadEntries.toString.startsWith("data/")) {
throw new IllegalArgumentException(s"path must start with data, found $payloadEntries")
}
Expand Down
34 changes: 32 additions & 2 deletions src/main/scala/nl.knaw.dans.easy.bag2deposit/Command.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,51 @@
package nl.knaw.dans.easy.bag2deposit

import better.files.File
import better.files.File.root
import nl.knaw.dans.easy.bag2deposit.collections.Collections.getCollectionsMap
import nl.knaw.dans.easy.bag2deposit.collections.FedoraProvider
import nl.knaw.dans.easy.bag2deposit.ddm.DdmTransformer
import nl.knaw.dans.lib.logging.DebugEnhancedLogging
import org.apache.commons.configuration.PropertiesConfiguration

import java.net.URI
import scala.language.reflectiveCalls

object Command extends App with DebugEnhancedLogging {
type FeedBackMessage = String
private val home = File(System.getProperty("app.home"))
val cfgPath = Seq(
root / "etc" / "opt" / "dans.knaw.nl" / "easy-convert-bag-to-deposit",
home / "cfg")
.find(_.exists)
.getOrElse { throw new IllegalStateException("No configuration directory found") }
val properties = {
new PropertiesConfiguration() {
setDelimiterParsingDisabled(true)
load((cfgPath / "application.properties").toJava)
}
}
val version = (home / "bin" / "version").contentAsString.stripLineEnd
val agent = properties.getString("http.agent", s"easy-convert-bag-to-deposit/$version")
logger.info(s"setting http.agent to $agent")
System.setProperty("http.agent", agent)

val configuration = Configuration(File(System.getProperty("app.home")))
val commandLine: CommandLineOptions = new CommandLineOptions(args, configuration) {
val commandLine: CommandLineOptions = new CommandLineOptions(args, version) {
verify()
}
private val bagParentDirs = commandLine.bagParentDir.map(Iterator(_))
.getOrElse(commandLine.bagGrandParentDir.map(_.children)
.getOrElse(Iterator.empty))


val configuration = Configuration(
version,
dansDoiPrefixes = properties.getStringArray("dans-doi.prefixes"),
dataverseIdAuthority = properties.getString("dataverse.id-authority"),
bagIndex = BagIndex(new URI(properties.getString("bag-index.url"))),
ddmTransformer = new DdmTransformer(cfgPath, getCollectionsMap(cfgPath, FedoraProvider(properties))),
userTransformer = new UserTransformer(cfgPath)
)
private val propertiesFactory = DepositPropertiesFactory(
configuration,
commandLine.idType(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ import org.rogach.scallop.{ ScallopConf, ScallopOption, ValueConverter, singleAr

import java.nio.file.Path

class CommandLineOptions(args: Array[String], configuration: Configuration) extends ScallopConf(args) {
class CommandLineOptions(args: Array[String], version: String) extends ScallopConf(args) {
appendDefaultToDescription = true
editBuilder(_.setHelpWidth(110))
printedName = "easy-convert-bag-to-deposit"
version(configuration.version)
version(version)
val description: String = s"""Add deposit.properties to directorie(s) with a bag"""
val synopsis: String =
s"""
| $printedName { --dir | --uuid } <directory> -t { URN | DOI } -s { FEDORA | VAULT } [ -o <output-dir> ]
|""".stripMargin

version(s"$printedName v${ configuration.version }")
version(s"$printedName v$version")
banner(
s"""
| $description
Expand Down
28 changes: 1 addition & 27 deletions src/main/scala/nl.knaw.dans.easy.bag2deposit/Configuration.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,31 +30,5 @@ case class Configuration(version: String,
dataverseIdAuthority: String,
bagIndex: BagIndex,
ddmTransformer: DdmTransformer,
userTransformer: UserTransformer,
)

object Configuration extends DebugEnhancedLogging {

def apply(home: File): Configuration = {
val cfgPath = Seq(
root / "etc" / "opt" / "dans.knaw.nl" / "easy-convert-bag-to-deposit",
home / "cfg")
.find(_.exists)
.getOrElse { throw new IllegalStateException("No configuration directory found") }
val properties = new PropertiesConfiguration() {
setDelimiterParsingDisabled(true)
load((cfgPath / "application.properties").toJava)
}
val version = (home / "bin" / "version").contentAsString.stripLineEnd
val agent = properties.getString("http.agent", s"easy-convert-bag-to-deposit/$version")
logger.info(s"setting http.agent to $agent")
System.setProperty("http.agent", agent)

new Configuration(
version,
dansDoiPrefixes = properties.getStringArray("dans-doi.prefixes"),
dataverseIdAuthority = properties.getString("dataverse.id-authority"),
bagIndex = BagIndex(new URI(properties.getString("bag-index.url"))),
ddmTransformer = new DdmTransformer(cfgPath, getCollectionsMap(cfgPath, FedoraProvider(properties))),
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ case class DepositPropertiesFactory(configuration: Configuration, idType: IdType
private val dataverseIdAuthority = configuration.dataverseIdAuthority

def create(bagInfo: BagInfo, ddm: Node): Try[PropertiesConfiguration] = Try {
trace(this.getClass)
val ddmIds: NodeSeq = ddm \ "dcmiMetadata" \ "identifier"

def formatOfPanId = (ddm \ "dcmiMetadata" \ "isFormatOf")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ import better.files.File
import better.files.File.CopyOptions
import nl.knaw.dans.easy.bag2deposit.Command.FeedBackMessage
import nl.knaw.dans.easy.bag2deposit.ddm.Provenance
import nl.knaw.dans.easy.bag2deposit.ddm.Provenance.compare
import nl.knaw.dans.lib.logging.DebugEnhancedLogging

import java.io.{ FileNotFoundException, IOException }
import java.nio.file.Paths
import java.nio.charset.Charset
import scala.collection.mutable.ListBuffer
import scala.util.{ Failure, Success, Try }
import scala.xml.{ Elem, NodeSeq }
import scala.xml.NodeSeq

class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnhancedLogging {

Expand All @@ -46,6 +48,7 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha
).toMap

def registerMatchedReports(urn: String, reports: NodeSeq): Unit = {
trace(urn)
reports.foreach { node =>
val reportUuid = (node \@ "valueURI").replaceAll(".*/", "")
Try(reportMatches(reportUuid) += s"\t$urn\t${ node.text }")
Expand All @@ -69,6 +72,7 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha
app = getClass.getSimpleName,
version = configuration.version
)
implicit val charset: Charset = Charset.forName("UTF-8")

private def addProps(depositPropertiesFactory: DepositPropertiesFactory, maybeOutputDir: Option[File])
(bagParentDir: File): Try[Boolean] = {
Expand All @@ -93,15 +97,26 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha
props <- depositPropertiesFactory.create(bagInfo, ddmIn)
datasetId = props.getString("identifier.fedora", "")
ddmOut <- configuration.ddmTransformer.transform(ddmIn, datasetId)
_ = provenance.xml(ddmIn, ddmOut).foreach(writeProvenance(bagDir))
_ = registerMatchedReports(datasetId, ddmOut \\ "reportNumber")
_ = props.save((bagParentDir / "deposit.properties").toJava)
_ = ddmFile.writeText(ddmOut.serialize)
oldDcmi = (ddmIn \ "dcmiMetadata").headOption.getOrElse(<dcmiMetadata/>)
newDcmi = (ddmOut \ "dcmiMetadata").headOption.getOrElse(<dcmiMetadata/>)
amdChanges <- configuration.userTransformer.transform(metadata / "amd.xml")
_ = provenance.collectChangesInXmls(Map(
"http://easy.dans.knaw.nl/easy/dataset-administrative-metadata/" -> amdChanges,
"http://easy.dans.knaw.nl/schemas/md/ddm/" -> compare(oldDcmi, newDcmi),
)).foreach(xml => (metadata / "provenance.xml").writeText(xml.serialize))
migrationDir = (bagDir / "data" / "easy-migration").createDirectories()
_ = migrationFiles.foreach(name => (metadata / name).copyTo(migrationDir / name))
_ = bagInfoKeysToRemove.foreach(mutableBagMetadata.remove)
_ = trace("updating metadata")
_ <- BagFacade.updateMetadata(bag)
_ = trace("updating payload manifest")
_ <- BagFacade.updatePayloadManifests(bag, Paths.get("data/easy-migration"))
_ = trace("updating tag manifest")
_ <- BagFacade.updateTagManifests(bag, changedMetadata)
_ = trace("writing manifests")
_ <- BagFacade.writeManifests(bag)
_ = maybeOutputDir.foreach(move(bagParentDir))
_ = logger.info(s"OK $datasetId ${ bagParentDir.name }/${ bagDir.name }")
Expand All @@ -118,11 +133,6 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha
Failure(e)
}

private def writeProvenance(bagDir: File)(xml: Elem) = {
trace(bagDir)
(bagDir / "metadata" / "provenance.xml").writeText(xml.serialize)
}

private def move(bagParentDir: File)(outputDir: File) = {
trace(bagParentDir, outputDir)
val target = outputDir / bagParentDir.name
Expand Down
61 changes: 61 additions & 0 deletions src/main/scala/nl.knaw.dans.easy.bag2deposit/UserTransformer.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/**
* Copyright (C) 2020 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.easy.bag2deposit

import better.files.File
import nl.knaw.dans.easy.bag2deposit.ddm.Provenance
import org.apache.commons.csv.CSVFormat.RFC4180

import java.nio.charset.Charset
import scala.util.Try
import scala.xml.Node
import scala.xml.transform.{ RewriteRule, RuleTransformer }

class UserTransformer(cfgDir: File) {
private val csvFile: File = cfgDir / "account-substitutes.csv"
private val userMap = if (!csvFile.exists || csvFile.isEmpty)
Map[String,String]()
else parseCsv(
csvFile,
nrOfHeaderLines = 1,
format = RFC4180.withHeader("old", "new"),
).map(r => r.get("old") -> r.get("new")).toMap

private val userRewriteRule: RewriteRule = new RewriteRule {
override def transform(node: Node): Seq[Node] = {
if (!Seq("depositorId", "signerId").contains(node.label)) node
else userMap
.get(node.text).map(id => <id>{ id }</id>.copy(label = node.label))
.getOrElse(node)
}
}
private val transformer = new RuleTransformer(userRewriteRule)

// The default charset is determined during virtual-machine startup and typically
// depends upon the locale and charset of the underlying operating system.
implicit val charset: Charset = Charset.forName("UTF-8")

def transform(file: File): Try[Seq[Node]] = {
for {
xmlIn <- loadXml(file)
xmlOut = transformer.transform(xmlIn).headOption
.getOrElse(throw new Exception("programming error: AmdTransformer returned multiple roots"))
_ = file.writeText(xmlOut.serialize)
diff = Provenance.compare(xmlIn, xmlOut)
_ = trace(diff.map(_.serialize))
} yield diff
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ object Collections extends DebugEnhancedLogging {
private val resolver: Resolver = Resolver()

private def parseCsv(file: File, format: CSVFormat): Try[Iterable[CSVRecord]] = {
trace(file)
managed(CSVParser.parse(
file.toJava,
Charset.forName("UTF-8"),
Expand All @@ -63,7 +64,7 @@ object Collections extends DebugEnhancedLogging {
}

def getCollectionsMap(cfgPath: File, maybeFedoraProvider: Option[FedoraProvider]): Map[String, Elem] = {
trace()
trace(())
val result: Map[String, Elem] = maybeFedoraProvider
.map { provider =>
memberDatasetIdToInCollection(collectionDatasetIdToInCollection(cfgPath), provider)
Expand Down Expand Up @@ -92,8 +93,6 @@ object Collections extends DebugEnhancedLogging {
)
}

logger.info(s"building collections from $cfgDir")

parseCsv(cfgDir / "ThemathischeCollecties.csv", collectionCsvFormat)
.unsafeGetOrThrow
.map(parseCollectionRecord)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ case class FedoraProviderException(query: String, cause: Throwable) extends Exce

object FedoraProvider extends DebugEnhancedLogging {
def apply(properties: PropertiesConfiguration): Option[FedoraProvider] = {
trace()
Option(properties.getString("fcrepo.url"))
val repo = properties.getString("fcrepo.url")
trace(this.getClass, repo)
Option(repo)
.toSeq.filter(_.trim.nonEmpty)
.map(url =>
new FedoraProvider(new FedoraClient(new FedoraCredentials(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ case class Resolver() extends DebugEnhancedLogging{
}

private def resolve(url: String) = {
trace(url)
Try(Http(url).asString).flatMap {
case response if response.code == 404 =>
logger.error(s"not found: $url")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import scala.xml.transform.{ RewriteRule, RuleTransformer }
import scala.xml.{ Elem, Node, NodeSeq }

class DdmTransformer(cfgDir: File, collectionsMap: Map[String, Elem] = Map.empty) extends DebugEnhancedLogging {
trace()
trace(())
val reportRewriteRule: ReportRewriteRule = ReportRewriteRule(cfgDir)
private val acquisitionRewriteRule = AcquisitionRewriteRule(cfgDir)
private val languageRewriteRule = LanguageRewriteRule(cfgDir / "languages.csv")
Expand Down Expand Up @@ -71,6 +71,7 @@ class DdmTransformer(cfgDir: File, collectionsMap: Map[String, Elem] = Map.empty
}

def transform(ddmIn: Node, datasetId: String): Try[Node] = {
trace(datasetId)
val newDcmiNodes = collectionsMap.get(datasetId)
.toSeq ++ unknownRightsHolder(ddmIn)

Expand Down
Loading

0 comments on commit 23f0eb9

Please sign in to comment.