From 1b27dbaeecdd78cf51b038a8714d6f0afd86b934 Mon Sep 17 00:00:00 2001 From: Jo Pol Date: Tue, 23 Nov 2021 11:54:53 +0100 Subject: [PATCH] DD-682: use Bag-Sequence-Number for migration info (#60) --- .../BagFacade.scala | 14 +++-- .../BagInfo.scala | 10 +++- .../EasyConvertBagToDepositApp.scala | 10 ++-- .../PreStaged.scala | 29 +++++----- .../AppSpec.scala | 57 ++++++++++++++++++- .../BagInfoSpec.scala | 6 +- .../PreStagedSpec.scala | 28 ++------- 7 files changed, 104 insertions(+), 50 deletions(-) diff --git a/src/main/scala/nl.knaw.dans.easy.bag2deposit/BagFacade.scala b/src/main/scala/nl.knaw.dans.easy.bag2deposit/BagFacade.scala index 93590be6..9521f58f 100644 --- a/src/main/scala/nl.knaw.dans.easy.bag2deposit/BagFacade.scala +++ b/src/main/scala/nl.knaw.dans.easy.bag2deposit/BagFacade.scala @@ -37,14 +37,20 @@ object BagFacade { val IS_VERSION_OF_KEY = "Is-Version-Of" val EASY_USER_ACCOUNT_KEY = "EASY-User-Account" + // duplicate of https://github.com/DANS-KNAW/easy-fedora-to-bag/blob/1cf9c03209/src/main/scala/nl/knaw/dans/easy/fedoratobag/BagVersion.scala#L30 + val BAG_SEQUENCE_NUMBER = "Bag-Sequence-Number" + // TODO variant of https://github.com/DANS-KNAW/easy-ingest-flow/blob/78ea3bec23923adf10c1c0650b019ea51c251ce6/src/main/scala/nl.knaw.dans.easy.ingestflow/BagitFacadeComponent.scala#L133 private val bagReader = new BagReader() - def getBag(bagDir: File): Try[Bag] = Try { - bagReader.read(bagDir.path) - }.recoverWith { - case cause: Exception => Failure(InvalidBagException(s"$bagDir, $cause")) + def getBag(bagDir: File): Try[Bag] = { + val triedBag = Try { + bagReader.read(bagDir.path) + } + triedBag.recoverWith { + case cause: Exception => Failure(InvalidBagException(s"$bagDir, $cause")) + } } def updateMetadata(bag: Bag): Try[Unit] = { diff --git a/src/main/scala/nl.knaw.dans.easy.bag2deposit/BagInfo.scala b/src/main/scala/nl.knaw.dans.easy.bag2deposit/BagInfo.scala index e3a6d046..8e13ad2c 100644 --- a/src/main/scala/nl.knaw.dans.easy.bag2deposit/BagInfo.scala +++ b/src/main/scala/nl.knaw.dans.easy.bag2deposit/BagInfo.scala @@ -24,7 +24,14 @@ import java.util.UUID import scala.collection.JavaConverters._ import scala.util.{ Failure, Try } -case class BagInfo(userId: String, created: String, uuid: UUID, bagName: String, versionOf: Option[UUID], basePids: Option[BasePids] = None) +case class BagInfo(userId: String, + created: String, + uuid: UUID, + bagName: String, + versionOf: Option[UUID], + bagSeqNr: Int = 1, + basePids: Option[BasePids] = None, + ) object BagInfo { // these values should match easy-fedora-to-bag @@ -53,6 +60,7 @@ object BagInfo { bagName = bagDir.name, versionOf = maybeVersionOf, basePids = basePids, + bagSeqNr = Option(bagInfo.get(BagFacade.BAG_SEQUENCE_NUMBER)).flatMap(_.asScala.headOption).getOrElse("1").toInt, ) }.recoverWith { case e: ConfigurationException => Failure(InvalidBagException(e.getMessage)) diff --git a/src/main/scala/nl.knaw.dans.easy.bag2deposit/EasyConvertBagToDepositApp.scala b/src/main/scala/nl.knaw.dans.easy.bag2deposit/EasyConvertBagToDepositApp.scala index 10f33dc8..e82bd318 100644 --- a/src/main/scala/nl.knaw.dans.easy.bag2deposit/EasyConvertBagToDepositApp.scala +++ b/src/main/scala/nl.knaw.dans.easy.bag2deposit/EasyConvertBagToDepositApp.scala @@ -28,7 +28,7 @@ import nl.knaw.dans.lib.logging.DebugEnhancedLogging import java.io.{ FileNotFoundException, IOException } import java.nio.charset.Charset import java.nio.file.Paths -import scala.collection.JavaConverters.mapAsScalaMapConverter +import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer import scala.util.{ Failure, Success, Try } import scala.xml._ @@ -85,6 +85,7 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha logger.debug(s"creating application.properties for $bagParentDir") val bagInfoKeysToRemove = Seq( BagFacade.EASY_USER_ACCOUNT_KEY, + BagFacade.BAG_SEQUENCE_NUMBER, BagInfo.baseUrnKey, BagInfo.baseDoiKey, ) @@ -113,7 +114,8 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha "http://easy.dans.knaw.nl/schemas/md/ddm/" -> compare(oldDcmi, newDcmi), )) _ = trace(bagInfo) - preStaged <- getPreStaged(bag, bagDir, depositProps.getString("identifier.doi")) + doi = depositProps.getString("identifier.doi") + preStaged <- getPreStaged(bag, bagDir, doi, bagInfo.bagSeqNr) _ = bagInfoKeysToRemove.foreach(mutableBagMetadata.remove) _ = depositProps.setProperty("depositor.userId", (amdOut \ "depositorId").text) // so far collecting changes @@ -148,13 +150,13 @@ class EasyConvertBagToDepositApp(configuration: Configuration) extends DebugEnha Failure(e) } - private def getPreStaged(bag: Bag, bagDir: File, doi: String): Try[Seq[PreStaged]] = { + private def getPreStaged(bag: Bag, bagDir: File, doi: String, version: Int = 1): Try[Seq[PreStaged]] = { configuration.maybePreStagedProvider.map { provider => val shaToPath = sha1Manifest(bag.getPayLoadManifests).asScala .map { case (path, sha) => sha -> bagDir.relativize(File(path)) }.toMap trace(doi) for { - migratedFiles <- provider.get(doi) // paths from migration info + migratedFiles <- provider.get(doi, version) // paths from migration info migratedPayloadFiles = migratedFiles.filterNot(_.path.toString.startsWith("easy-migration/")) // exclude metadata migrated as data for provenance existingMigratedFiles = migratedPayloadFiles.filter(p => shaToPath.keySet.contains(p.checksumValue)) _ = debug(s"ignored for pre-staged.csv: ${ migratedFiles.diff(migratedPayloadFiles) }") diff --git a/src/main/scala/nl.knaw.dans.easy.bag2deposit/PreStaged.scala b/src/main/scala/nl.knaw.dans.easy.bag2deposit/PreStaged.scala index 7e9ce13e..2e0e9ba0 100644 --- a/src/main/scala/nl.knaw.dans.easy.bag2deposit/PreStaged.scala +++ b/src/main/scala/nl.knaw.dans.easy.bag2deposit/PreStaged.scala @@ -24,18 +24,14 @@ import scalaj.http.{ Http, HttpResponse } import java.io.IOException import java.net.URI import java.nio.file.{ Path, Paths } -import scala.util.{ Failure, Success, Try } +import scala.util.{ Failure, Try } /** - * @param path as in the manifest file - * @param storageId as returned from the migration-info-service + * @param path as in the manifest file, assembled from migration-info-service + * @param checksumValue as returned from the migration-info-service + * @param storageId as returned from the migration-info-service */ -case class PreStaged(path: Path, - mimeType: String, - checksumType: String, - checksumValue: String, - storageId: String, - ) +case class PreStaged(path: Path, checksumValue: String, storageId: String) object PreStaged { private val csvFormat = RFC4180 .withHeader("path", "checksum", "storageId") @@ -53,9 +49,18 @@ object PreStaged { ) } + /** copy of nl.knaw.dans.lib.dataverse/model/file/prestaged/Checksum.scala */ private case class CheckSum(`@type`: String, `@value`: String) - private case class PrestagedFile(storageIdentifier: String, fileName: String, mimeType: String, checksum: CheckSum) - private case class MigrationInfo(label: String, directoryLabel: Option[String], versionSequenceNumber: String, prestagedFile: PrestagedFile) + + /** subset of nl.knaw.dans.lib.dataverse/model/file/prestaged/PrestagedFile.scala */ + private case class PrestagedFile(storageIdentifier: String, fileName: String, checksum: CheckSum) + + /** copy of nl.knaw.dans.dd.migrationinfo/BasicFileMeta.scala + * and nl.knaw.dans.easy.dd2d/migrationinfo/BasicFileMeta.scala + * returned from the migration-info-service + */ + private case class MigrationInfo(label: String, directoryLabel: Option[String], versionSequenceNumber: Int, prestagedFile: PrestagedFile) + private implicit val jsonFormats: Formats = new DefaultFormats {} def apply(json: String): Try[List[PreStaged]] = Try { @@ -64,8 +69,6 @@ object PreStaged { val preStagedFiles = migrationInfoes .map(mi => new PreStaged( Paths.get(s"${ mi.directoryLabel.map(l => s"$l/").getOrElse("") }${ mi.prestagedFile.fileName }"), - mi.prestagedFile.mimeType, - mi.prestagedFile.checksum.`@type`, mi.prestagedFile.checksum.`@value`, mi.prestagedFile.storageIdentifier, )) diff --git a/src/test/scala/nl.knaw.dans.easy.bag2deposit/AppSpec.scala b/src/test/scala/nl.knaw.dans.easy.bag2deposit/AppSpec.scala index da44da05..c552a03f 100644 --- a/src/test/scala/nl.knaw.dans.easy.bag2deposit/AppSpec.scala +++ b/src/test/scala/nl.knaw.dans.easy.bag2deposit/AppSpec.scala @@ -175,7 +175,7 @@ class AppSpec extends AnyFlatSpec with XmlSupport with Matchers with AppConfigSu | { | "label": "stringABC", | "directoryLabel": "data", - | "versionSequenceNumber": "1", + | "versionSequenceNumber": 1, | "prestagedFile": { | "storageIdentifier": "123", | "fileName": "foo.txt", @@ -188,7 +188,8 @@ class AppSpec extends AnyFlatSpec with XmlSupport with Matchers with AppConfigSu | } |]""".stripMargin val delegatePreStagedProvider = mock[PreStagedProvider] - (delegatePreStagedProvider.execute(_: String)) expects * returning + (delegatePreStagedProvider.execute(_: String) + ) expects "/datasets/:persistentId/seq/1/basic-file-metas?persistentId=doi:10.5072/dans-2xg-umq8" returning HttpResponse(sampleJson, 200, Map.empty) delegatingPreStagedProvider(delegatePreStagedProvider) } @@ -213,6 +214,58 @@ class AppSpec extends AnyFlatSpec with XmlSupport with Matchers with AppConfigSu (movedBag / "manifest-sha1.txt").contentAsString shouldNot include("foo.txt") } + it should "search migration-info with seq nr 2" in { + + val preStagedProvider = { + val sampleJson: String = + """[ + | { + | "label": "stringABC", + | "directoryLabel": "data", + | "versionSequenceNumber": 2, + | "prestagedFile": { + | "storageIdentifier": "123", + | "fileName": "foo.txt", + | "mimeType": "text/plain", + | "checksum": { + | "@type": "sha1", + | "@value": "62cdb7020ff920e5aa642c3d4066950dd1f01f4d" + | }, + | } + | } + |]""".stripMargin + val delegatePreStagedProvider = mock[PreStagedProvider] + (delegatePreStagedProvider.execute(_: String) + ) expects "/datasets/:persistentId/seq/2/basic-file-metas?persistentId=doi:10.5072/dans-2xg-umq8" returning + HttpResponse(sampleJson, 200, Map.empty) + delegatingPreStagedProvider(delegatePreStagedProvider) + } + val bagIndex = { + val delegateBagIndex = mock[MockBagIndex] + (delegateBagIndex.execute(_: String)) expects s"bag-sequence?contains=$validUUID" returning + new HttpResponse[String]("123", 200, Map.empty) + delegatingBagIndex(delegateBagIndex) + } + val appConfig = testConfig(bagIndex, null, preStagedProvider) + + val depositDir = testDir / "exports" / validUUID + (resourceBags / validUUID).copyTo(depositDir) + val bagInfoFile = depositDir / "bag-revision-1" / "bag-info.txt" + + // note that the number 2 reappears in sampleJson, the mocked result of the migration-info service + bagInfoFile.write(bagInfoFile.contentAsString + "Bag-Sequence-Number: 2\n") + + new EasyConvertBagToDepositApp(appConfig).addPropsToBags( + (testDir / "exports").children, + maybeOutputDir = Some((testDir / "ingest-dir").createDirectories()), + DepositPropertiesFactory(appConfig, DOI, VAULT) + ) shouldBe Success("No fatal errors") + + val movedBag = testDir / "ingest-dir" / validUUID / "bag-revision-1" + (movedBag / "data").list.toSeq.map(_.name) shouldBe Seq("easy-migration") // and no real payload + (movedBag / "manifest-sha1.txt").contentAsString shouldNot include("foo.txt") + } + it should "not add the emd to data/easy-migration in VAULT datasets" in { val delegate = mock[MockBagIndex] (delegate.execute(_: String)) expects s"bag-sequence?contains=$validUUID" returning diff --git a/src/test/scala/nl.knaw.dans.easy.bag2deposit/BagInfoSpec.scala b/src/test/scala/nl.knaw.dans.easy.bag2deposit/BagInfoSpec.scala index 3a95ffaa..f50da687 100644 --- a/src/test/scala/nl.knaw.dans.easy.bag2deposit/BagInfoSpec.scala +++ b/src/test/scala/nl.knaw.dans.easy.bag2deposit/BagInfoSpec.scala @@ -73,7 +73,7 @@ class BagInfoSpec extends AnyFlatSpec with Matchers with AppConfigSupport with B s"""Bagging-Date: $dateTime |EASY-User-Account: user001 |""".stripMargin) - BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, uuid, "bag-name", None, None)) + BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, uuid, "bag-name", None, 1, None)) } it should "have a version-of" in { val bagUuid = UUID.randomUUID() @@ -85,7 +85,7 @@ class BagInfoSpec extends AnyFlatSpec with Matchers with AppConfigSupport with B |Is-Version-Of: $versionOfUuid |EASY-User-Account: user001 |""".stripMargin) - BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, bagUuid, "bag-name", Some(versionOfUuid), None)) + BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, bagUuid, "bag-name", Some(versionOfUuid), 1, None)) } it should "have a base-urn" in { val bagUuid = UUID.randomUUID() @@ -99,6 +99,6 @@ class BagInfoSpec extends AnyFlatSpec with Matchers with AppConfigSupport with B |${ BagInfo.baseDoiKey }: lalala |EASY-User-Account: user001 |""".stripMargin) - BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, bagUuid, "bag-name", Some(versionOfUuid), Some(BasePids("rabarbera", "lalala")))) + BagInfo(bagDir, mockBag(bagDir).getMetadata) shouldBe Success(new BagInfo("user001", dateTime, bagUuid, "bag-name", Some(versionOfUuid), 1, Some(BasePids("rabarbera", "lalala")))) } } diff --git a/src/test/scala/nl.knaw.dans.easy.bag2deposit/PreStagedSpec.scala b/src/test/scala/nl.knaw.dans.easy.bag2deposit/PreStagedSpec.scala index 46b9e140..a0d743bd 100644 --- a/src/test/scala/nl.knaw.dans.easy.bag2deposit/PreStagedSpec.scala +++ b/src/test/scala/nl.knaw.dans.easy.bag2deposit/PreStagedSpec.scala @@ -73,33 +73,15 @@ class PreStagedSpec extends AnyFlatSpec with Matchers with FileSystemSupport wit val mockedProvider = mock[PreStagedProvider] (mockedProvider.execute(_: String)) expects "/datasets/:persistentId/seq/1/basic-file-metas?persistentId=doi:some-doi" returning HttpResponse(sampleJson, 200, Map.empty) - val preStaged = Seq( - new PreStaged( - path = Paths.get("gf/diagrams/brick-to-overlap-choices.png"), - mimeType = "image/png", - checksumType = "SHA-1", - checksumValue = "393432fcc68b5cd8b4b3bf15c5244c2631577694", - storageId = "file://17b2f042ee9-ed12da9cbf50", - ), - new PreStaged( - path = Paths.get("3-of-4.jpg"), - mimeType = "image/jpeg", - checksumType = "SHA-1", - checksumValue = "1776070b9338352a5be96847187c96b30987dfd5", - storageId = "file://17b2f03c4c8-c08fbd7eee88", - ), + val expectedPreStaged = Seq( + new PreStaged(path = Paths.get("gf/diagrams/brick-to-overlap-choices.png"), checksumValue = "393432fcc68b5cd8b4b3bf15c5244c2631577694", storageId = "file://17b2f042ee9-ed12da9cbf50"), + new PreStaged(path = Paths.get("3-of-4.jpg"), checksumValue = "1776070b9338352a5be96847187c96b30987dfd5", storageId = "file://17b2f03c4c8-c08fbd7eee88"), ) - delegatingPreStagedProvider(mockedProvider).get("some-doi") shouldBe Success(preStaged) + delegatingPreStagedProvider(mockedProvider).get("some-doi") shouldBe Success(expectedPreStaged) } "write" should "create csv" in { - val preStaged = new PreStaged( - path = Paths.get("some/path/to/something.txt"), - mimeType = "text/plain", - checksumType = "sha1", - checksumValue = "blabla", - storageId = "123", - ) + val preStaged = new PreStaged(path = Paths.get("some/path/to/something.txt"), checksumValue = "blabla", storageId = "123") PreStaged.write(Seq(preStaged), testDir) shouldBe Success(()) (testDir / "pre-staged.csv").contentAsString shouldBe expectedCsv }