Skip to content

Commit

Permalink
DD-682: use Bag-Sequence-Number for migration info (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
jo-pol authored Nov 23, 2021
1 parent 65609bc commit 1b27dba
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 50 deletions.
14 changes: 10 additions & 4 deletions src/main/scala/nl.knaw.dans.easy.bag2deposit/BagFacade.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,20 @@ object BagFacade {
val IS_VERSION_OF_KEY = "Is-Version-Of"
val EASY_USER_ACCOUNT_KEY = "EASY-User-Account"

// duplicate of https://github.com/DANS-KNAW/easy-fedora-to-bag/blob/1cf9c03209/src/main/scala/nl/knaw/dans/easy/fedoratobag/BagVersion.scala#L30
val BAG_SEQUENCE_NUMBER = "Bag-Sequence-Number"

// TODO variant of https://github.com/DANS-KNAW/easy-ingest-flow/blob/78ea3bec23923adf10c1c0650b019ea51c251ce6/src/main/scala/nl.knaw.dans.easy.ingestflow/BagitFacadeComponent.scala#L133

private val bagReader = new BagReader()

def getBag(bagDir: File): Try[Bag] = Try {
bagReader.read(bagDir.path)
}.recoverWith {
case cause: Exception => Failure(InvalidBagException(s"$bagDir, $cause"))
def getBag(bagDir: File): Try[Bag] = {
val triedBag = Try {
bagReader.read(bagDir.path)
}
triedBag.recoverWith {
case cause: Exception => Failure(InvalidBagException(s"$bagDir, $cause"))
}
}

def updateMetadata(bag: Bag): Try[Unit] = {
Expand Down
10 changes: 9 additions & 1 deletion src/main/scala/nl.knaw.dans.easy.bag2deposit/BagInfo.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@ import java.util.UUID
import scala.collection.JavaConverters._
import scala.util.{ Failure, Try }

case class BagInfo(userId: String, created: String, uuid: UUID, bagName: String, versionOf: Option[UUID], basePids: Option[BasePids] = None)
case class BagInfo(userId: String,
created: String,
uuid: UUID,
bagName: String,
versionOf: Option[UUID],
bagSeqNr: Int = 1,
basePids: Option[BasePids] = None,
)

object BagInfo {
// these values should match easy-fedora-to-bag
Expand Down Expand Up @@ -53,6 +60,7 @@ object BagInfo {
bagName = bagDir.name,
versionOf = maybeVersionOf,
basePids = basePids,
bagSeqNr = Option(bagInfo.get(BagFacade.BAG_SEQUENCE_NUMBER)).flatMap(_.asScala.headOption).getOrElse("1").toInt,
)
}.recoverWith { case e: ConfigurationException =>
Failure(InvalidBagException(e.getMessage))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import nl.knaw.dans.lib.logging.DebugEnhancedLogging
import java.io.{ FileNotFoundException, IOException }
import java.nio.charset.Charset
import java.nio.file.Paths
import scala.collection.JavaConverters.mapAsScalaMapConverter
import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.util.{ Failure, Success, Try }
import scala.xml._
Expand Down Expand Up @@ -85,6 +85,7 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha
logger.debug(s"creating application.properties for $bagParentDir")
val bagInfoKeysToRemove = Seq(
BagFacade.EASY_USER_ACCOUNT_KEY,
BagFacade.BAG_SEQUENCE_NUMBER,
BagInfo.baseUrnKey,
BagInfo.baseDoiKey,
)
Expand Down Expand Up @@ -113,7 +114,8 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha
"http://easy.dans.knaw.nl/schemas/md/ddm/" -> compare(oldDcmi, newDcmi),
))
_ = trace(bagInfo)
preStaged <- getPreStaged(bag, bagDir, depositProps.getString("identifier.doi"))
doi = depositProps.getString("identifier.doi")
preStaged <- getPreStaged(bag, bagDir, doi, bagInfo.bagSeqNr)
_ = bagInfoKeysToRemove.foreach(mutableBagMetadata.remove)
_ = depositProps.setProperty("depositor.userId", (amdOut \ "depositorId").text)
// so far collecting changes
Expand Down Expand Up @@ -148,13 +150,13 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha
Failure(e)
}

private def getPreStaged(bag: Bag, bagDir: File, doi: String): Try[Seq[PreStaged]] = {
private def getPreStaged(bag: Bag, bagDir: File, doi: String, version: Int = 1): Try[Seq[PreStaged]] = {
configuration.maybePreStagedProvider.map { provider =>
val shaToPath = sha1Manifest(bag.getPayLoadManifests).asScala
.map { case (path, sha) => sha -> bagDir.relativize(File(path)) }.toMap
trace(doi)
for {
migratedFiles <- provider.get(doi) // paths from migration info
migratedFiles <- provider.get(doi, version) // paths from migration info
migratedPayloadFiles = migratedFiles.filterNot(_.path.toString.startsWith("easy-migration/")) // exclude metadata migrated as data for provenance
existingMigratedFiles = migratedPayloadFiles.filter(p => shaToPath.keySet.contains(p.checksumValue))
_ = debug(s"ignored for pre-staged.csv: ${ migratedFiles.diff(migratedPayloadFiles) }")
Expand Down
29 changes: 16 additions & 13 deletions src/main/scala/nl.knaw.dans.easy.bag2deposit/PreStaged.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,14 @@ import scalaj.http.{ Http, HttpResponse }
import java.io.IOException
import java.net.URI
import java.nio.file.{ Path, Paths }
import scala.util.{ Failure, Success, Try }
import scala.util.{ Failure, Try }

/**
* @param path as in the manifest file
* @param storageId as returned from the migration-info-service
* @param path as in the manifest file, assembled from migration-info-service
* @param checksumValue as returned from the migration-info-service
* @param storageId as returned from the migration-info-service
*/
case class PreStaged(path: Path,
mimeType: String,
checksumType: String,
checksumValue: String,
storageId: String,
)
case class PreStaged(path: Path, checksumValue: String, storageId: String)
object PreStaged {
private val csvFormat = RFC4180
.withHeader("path", "checksum", "storageId")
Expand All @@ -53,9 +49,18 @@ object PreStaged {
)
}

/** copy of nl.knaw.dans.lib.dataverse/model/file/prestaged/Checksum.scala */
private case class CheckSum(`@type`: String, `@value`: String)
private case class PrestagedFile(storageIdentifier: String, fileName: String, mimeType: String, checksum: CheckSum)
private case class MigrationInfo(label: String, directoryLabel: Option[String], versionSequenceNumber: String, prestagedFile: PrestagedFile)

/** subset of nl.knaw.dans.lib.dataverse/model/file/prestaged/PrestagedFile.scala */
private case class PrestagedFile(storageIdentifier: String, fileName: String, checksum: CheckSum)

/** copy of nl.knaw.dans.dd.migrationinfo/BasicFileMeta.scala
* and nl.knaw.dans.easy.dd2d/migrationinfo/BasicFileMeta.scala
* returned from the migration-info-service
*/
private case class MigrationInfo(label: String, directoryLabel: Option[String], versionSequenceNumber: Int, prestagedFile: PrestagedFile)

private implicit val jsonFormats: Formats = new DefaultFormats {}

def apply(json: String): Try[List[PreStaged]] = Try {
Expand All @@ -64,8 +69,6 @@ object PreStaged {
val preStagedFiles = migrationInfoes
.map(mi => new PreStaged(
Paths.get(s"${ mi.directoryLabel.map(l => s"$l/").getOrElse("") }${ mi.prestagedFile.fileName }"),
mi.prestagedFile.mimeType,
mi.prestagedFile.checksum.`@type`,
mi.prestagedFile.checksum.`@value`,
mi.prestagedFile.storageIdentifier,
))
Expand Down
57 changes: 55 additions & 2 deletions src/test/scala/nl.knaw.dans.easy.bag2deposit/AppSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class AppSpec extends AnyFlatSpec with XmlSupport with Matchers with AppConfigSu
| {
| "label": "stringABC",
| "directoryLabel": "data",
| "versionSequenceNumber": "1",
| "versionSequenceNumber": 1,
| "prestagedFile": {
| "storageIdentifier": "123",
| "fileName": "foo.txt",
Expand All @@ -188,7 +188,8 @@ class AppSpec extends AnyFlatSpec with XmlSupport with Matchers with AppConfigSu
| }
|]""".stripMargin
val delegatePreStagedProvider = mock[PreStagedProvider]
(delegatePreStagedProvider.execute(_: String)) expects * returning
(delegatePreStagedProvider.execute(_: String)
) expects "/datasets/:persistentId/seq/1/basic-file-metas?persistentId=doi:10.5072/dans-2xg-umq8" returning
HttpResponse(sampleJson, 200, Map.empty)
delegatingPreStagedProvider(delegatePreStagedProvider)
}
Expand All @@ -213,6 +214,58 @@ class AppSpec extends AnyFlatSpec with XmlSupport with Matchers with AppConfigSu
(movedBag / "manifest-sha1.txt").contentAsString shouldNot include("foo.txt")
}

it should "search migration-info with seq nr 2" in {

val preStagedProvider = {
val sampleJson: String =
"""[
| {
| "label": "stringABC",
| "directoryLabel": "data",
| "versionSequenceNumber": 2,
| "prestagedFile": {
| "storageIdentifier": "123",
| "fileName": "foo.txt",
| "mimeType": "text/plain",
| "checksum": {
| "@type": "sha1",
| "@value": "62cdb7020ff920e5aa642c3d4066950dd1f01f4d"
| },
| }
| }
|]""".stripMargin
val delegatePreStagedProvider = mock[PreStagedProvider]
(delegatePreStagedProvider.execute(_: String)
) expects "/datasets/:persistentId/seq/2/basic-file-metas?persistentId=doi:10.5072/dans-2xg-umq8" returning
HttpResponse(sampleJson, 200, Map.empty)
delegatingPreStagedProvider(delegatePreStagedProvider)
}
val bagIndex = {
val delegateBagIndex = mock[MockBagIndex]
(delegateBagIndex.execute(_: String)) expects s"bag-sequence?contains=$validUUID" returning
new HttpResponse[String]("123", 200, Map.empty)
delegatingBagIndex(delegateBagIndex)
}
val appConfig = testConfig(bagIndex, null, preStagedProvider)

val depositDir = testDir / "exports" / validUUID
(resourceBags / validUUID).copyTo(depositDir)
val bagInfoFile = depositDir / "bag-revision-1" / "bag-info.txt"

// note that the number 2 reappears in sampleJson, the mocked result of the migration-info service
bagInfoFile.write(bagInfoFile.contentAsString + "Bag-Sequence-Number: 2\n")

new EasyConvertBagToDepositApp(appConfig).addPropsToBags(
(testDir / "exports").children,
maybeOutputDir = Some((testDir / "ingest-dir").createDirectories()),
DepositPropertiesFactory(appConfig, DOI, VAULT)
) shouldBe Success("No fatal errors")

val movedBag = testDir / "ingest-dir" / validUUID / "bag-revision-1"
(movedBag / "data").list.toSeq.map(_.name) shouldBe Seq("easy-migration") // and no real payload
(movedBag / "manifest-sha1.txt").contentAsString shouldNot include("foo.txt")
}

it should "not add the emd to data/easy-migration in VAULT datasets" in {
val delegate = mock[MockBagIndex]
(delegate.execute(_: String)) expects s"bag-sequence?contains=$validUUID" returning
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class BagInfoSpec extends AnyFlatSpec with Matchers with AppConfigSupport with B
s"""Bagging-Date: $dateTime
|EASY-User-Account: user001
|""".stripMargin)
BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, uuid, "bag-name", None, None))
BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, uuid, "bag-name", None, 1, None))
}
it should "have a version-of" in {
val bagUuid = UUID.randomUUID()
Expand All @@ -85,7 +85,7 @@ class BagInfoSpec extends AnyFlatSpec with Matchers with AppConfigSupport with B
|Is-Version-Of: $versionOfUuid
|EASY-User-Account: user001
|""".stripMargin)
BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, bagUuid, "bag-name", Some(versionOfUuid), None))
BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, bagUuid, "bag-name", Some(versionOfUuid), 1, None))
}
it should "have a base-urn" in {
val bagUuid = UUID.randomUUID()
Expand All @@ -99,6 +99,6 @@ class BagInfoSpec extends AnyFlatSpec with Matchers with AppConfigSupport with B
|${ BagInfo.baseDoiKey }: lalala
|EASY-User-Account: user001
|""".stripMargin)
BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, bagUuid, "bag-name", Some(versionOfUuid), Some(BasePids("rabarbera", "lalala"))))
BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, bagUuid, "bag-name", Some(versionOfUuid), 1, Some(BasePids("rabarbera", "lalala"))))
}
}
28 changes: 5 additions & 23 deletions src/test/scala/nl.knaw.dans.easy.bag2deposit/PreStagedSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -73,33 +73,15 @@ class PreStagedSpec extends AnyFlatSpec with Matchers with FileSystemSupport wit
val mockedProvider = mock[PreStagedProvider]
(mockedProvider.execute(_: String)) expects "/datasets/:persistentId/seq/1/basic-file-metas?persistentId=doi:some-doi" returning
HttpResponse(sampleJson, 200, Map.empty)
val preStaged = Seq(
new PreStaged(
path = Paths.get("gf/diagrams/brick-to-overlap-choices.png"),
mimeType = "image/png",
checksumType = "SHA-1",
checksumValue = "393432fcc68b5cd8b4b3bf15c5244c2631577694",
storageId = "file://17b2f042ee9-ed12da9cbf50",
),
new PreStaged(
path = Paths.get("3-of-4.jpg"),
mimeType = "image/jpeg",
checksumType = "SHA-1",
checksumValue = "1776070b9338352a5be96847187c96b30987dfd5",
storageId = "file://17b2f03c4c8-c08fbd7eee88",
),
val expectedPreStaged = Seq(
new PreStaged(path = Paths.get("gf/diagrams/brick-to-overlap-choices.png"), checksumValue = "393432fcc68b5cd8b4b3bf15c5244c2631577694", storageId = "file://17b2f042ee9-ed12da9cbf50"),
new PreStaged(path = Paths.get("3-of-4.jpg"), checksumValue = "1776070b9338352a5be96847187c96b30987dfd5", storageId = "file://17b2f03c4c8-c08fbd7eee88"),
)
delegatingPreStagedProvider(mockedProvider).get("some-doi") shouldBe Success(preStaged)
delegatingPreStagedProvider(mockedProvider).get("some-doi") shouldBe Success(expectedPreStaged)
}

"write" should "create csv" in {
val preStaged = new PreStaged(
path = Paths.get("some/path/to/something.txt"),
mimeType = "text/plain",
checksumType = "sha1",
checksumValue = "blabla",
storageId = "123",
)
val preStaged = new PreStaged(path = Paths.get("some/path/to/something.txt"), checksumValue = "blabla", storageId = "123")
PreStaged.write(Seq(preStaged), testDir) shouldBe Success(())
(testDir / "pre-staged.csv").contentAsString shouldBe expectedCsv
}
Expand Down

0 comments on commit 1b27dba

Please sign in to comment.