From 106bff201086df68587a3d76d4953fd1199df83d Mon Sep 17 00:00:00 2001 From: vesaakerman Date: Thu, 18 Jul 2019 09:27:09 +0200 Subject: [PATCH] EASY-2136: filesize routes to fetch the size of a file (#94) --- docs/api/api.yml | 67 +++++++++++++++++++ .../component/BagStoreComponent.scala | 53 ++++++++++----- .../component/BagStoresComponent.scala | 14 ++++ .../server/BagsServletComponent.scala | 25 +++++++ .../server/StoresServletComponent.scala | 28 ++++++++ .../server/BagsServletSpec.scala | 39 ++++++++++- 6 files changed, 210 insertions(+), 16 deletions(-) diff --git a/docs/api/api.yml b/docs/api/api.yml index 24e5eb45..942c3c03 100755 --- a/docs/api/api.yml +++ b/docs/api/api.yml @@ -81,6 +81,8 @@ paths: responses: 200: $ref: '#/components/responses/GetBagOk' + 400: + description: Bad request. 404: description: The bag could not be found in any of the bag stores managed by this service. 410: @@ -111,11 +113,41 @@ paths: responses: 200: $ref: '#/components/responses/GetItemFromBagOk' + 400: + description: Bad request. 404: description: The item could not be found. 410: description: The bag containing the item is `INACTIVE`. + /bags/filesizes/{uuid}/{path}: + get: + summary: Returns the file size of a file, from a bag in any of the bag stores managed by this service. + description: | + A file is a regular file in a bag. + + * The file is specified by the path of the requested file within the bag. + * The path may consist of more than one component. + * Not found response is given if the file is not a regular file + + ``` + example path: + for file: /path/to/file + in bag: 40594b6d-8378-4260-b96b-13b57beadf7c/ + {uuid}/{path}: 40594b6d-8378-4260-b96b-13b57beadf7c/path/to/file + ``` + + parameters: + - $ref: '#/components/parameters/Uuid' + - $ref: '#/components/parameters/FilePath' + responses: + 200: + $ref: '#/components/responses/GetItemFromBagOk' + 400: + description: Bad request. + 404: + description: The item could not be found. + /stores: get: operationId: getStores @@ -157,6 +189,8 @@ paths: example: | Bag store 'my-store'. Bags for this store at + 400: + description: Bad request. 404: description: "This store does not exist or is not managed by this service." @@ -188,6 +222,8 @@ paths: responses: 200: $ref: '#/components/responses/GetBagOk' + 400: + description: Bad request. 404: description: The bag could not be found in any of the bag stores managed by this service. 409: @@ -254,11 +290,42 @@ paths: responses: 200: $ref: '#/components/responses/GetItemFromBagOk' + 400: + description: Bad request. 404: description: The item could not be found. 409: description: The bag containing the item is `INACTIVE`. + /stores/{store}/bags/filesizes/{uuid}/{path}: + get: + summary: Returns the file size of a file in the specified bag store. + description: | + A file is a regular file in a bag. + + * The file is specified by the path of the requested file within the bag. + * The path may consist of more than one component. + * Not found response is given if the file is not a regular file + + ``` + example path: + for file: /path/to/file + in bag: 40594b6d-8378-4260-b96b-13b57beadf7c/ + {uuid}/{path}: 40594b6d-8378-4260-b96b-13b57beadf7c/path/to/file + ``` + + parameters: + - $ref: '#/components/parameters/Store' + - $ref: '#/components/parameters/Uuid' + - $ref: '#/components/parameters/FilePath' + responses: + 200: + $ref: '#/components/responses/GetItemFromBagOk' + 400: + description: Bad request. + 404: + description: The item could not be found. + components: diff --git a/src/main/scala/nl.knaw.dans.easy.bagstore/component/BagStoreComponent.scala b/src/main/scala/nl.knaw.dans.easy.bagstore/component/BagStoreComponent.scala index 3a17c40e..77f6cd4c 100755 --- a/src/main/scala/nl.knaw.dans.easy.bagstore/component/BagStoreComponent.scala +++ b/src/main/scala/nl.knaw.dans.easy.bagstore/component/BagStoreComponent.scala @@ -156,21 +156,44 @@ trait BagStoreComponent { trace(itemId) val bagId = BagId(itemId.uuid) - fileSystem.checkBagExists(bagId).flatMap { _ => - for { - bagDir <- fileSystem.toLocation(bagId) - itemPath <- itemId.toFileId.map(f => bagDir.resolve(f.path)).orElse(Success(bagDir)) - fileIds <- enumFiles(itemId, forceInactive = true) // validation that the bagdir is active will be done later one - fileSpecs <- createFileSpecs(bagDir, itemPath, fileIds) - dirSpecs <- createDirectorySpecs(bagDir, itemPath, fileIds) - allEntriesCount = fileSpecs.length + dirSpecs.length - allEntries = () => (dirSpecs ++ fileSpecs).sortBy(_.entryPath) // only concat and sort if necessary, hence as a function here - _ <- fileIsFound(allEntriesCount, itemId) - _ <- validateThatBagDirIsNotHidden(bagDir, itemId, forceInactive) // if the bag is hidden, also don't return a specific item from the bag - _ <- archiveStreamType.map(copyToArchiveStream(outputStream)(allEntries)) - .getOrElse(copyToOutputStream(itemId, fileIds, allEntriesCount, outputStream)) - } yield () - } + for { + _ <- fileSystem.checkBagExists(bagId) + bagDir <- fileSystem.toLocation(bagId) + itemPath <- itemId.toFileId.map(f => bagDir.resolve(f.path)).orElse(Success(bagDir)) + fileIds <- enumFiles(itemId, forceInactive = true) // validation that the bagdir is active will be done later one + fileSpecs <- createFileSpecs(bagDir, itemPath, fileIds) + dirSpecs <- createDirectorySpecs(bagDir, itemPath, fileIds) + allEntriesCount = fileSpecs.length + dirSpecs.length + allEntries = () => (dirSpecs ++ fileSpecs).sortBy(_.entryPath) // only concat and sort if necessary, hence as a function here + _ <- fileIsFound(allEntriesCount, itemId) + _ <- validateThatBagDirIsNotHidden(bagDir, itemId, forceInactive) // if the bag is hidden, also don't return a specific item from the bag + _ <- archiveStreamType.map(copyToArchiveStream(outputStream)(allEntries)) + .getOrElse(copyToOutputStream(itemId, fileIds, allEntriesCount, outputStream)) + } yield () + } + + def getSize(itemId: ItemId): Try[Long] = { + trace(itemId) + val bagId = BagId(itemId.uuid) + + for { + _ <- fileSystem.checkBagExists(bagId) + bagDir <- fileSystem.toLocation(bagId) + itemPath <- itemId.toFileId.map(f => bagDir.resolve(f.path)) + _ <- itemExists(itemId, itemPath) + } yield getFileSize(itemId, itemPath) + } + + private def itemExists(itemId: ItemId, itemPath: Path): Try[Unit] = Try { + if (Files.notExists(itemPath)) + throw NoSuchItemException (itemId) + } + + private def getFileSize(itemId: ItemId, itemPath: Path): Long = { + if (Files.isRegularFile(itemPath)) + Files.size(itemPath) + else + throw NoRegularFileException(itemId) } private def fileIsFound(entriesCount: Int, itemId: ItemId): Try[Unit] = Try { diff --git a/src/main/scala/nl.knaw.dans.easy.bagstore/component/BagStoresComponent.scala b/src/main/scala/nl.knaw.dans.easy.bagstore/component/BagStoresComponent.scala index 96c9266f..05006cbe 100755 --- a/src/main/scala/nl.knaw.dans.easy.bagstore/component/BagStoresComponent.scala +++ b/src/main/scala/nl.knaw.dans.easy.bagstore/component/BagStoresComponent.scala @@ -66,6 +66,20 @@ trait BagStoresComponent { } } + def getSize(itemId: ItemId, fromStore: Option[BaseDir] = None): Try[Long] = { + fromStore + .map(BagStore(_).getSize(itemId)) + .getOrElse { + storeShortnames.values.toStream + .map(BagStore(_).getSize(itemId)) + .find { + case Failure(_: NoSuchBagException) => false + case _ => true + } + .getOrElse(Failure(NoSuchBagException(BagId(itemId.uuid)))) + } + } + def enumBags(includeActive: Boolean = true, includeInactive: Boolean = false, fromStore: Option[BaseDir] = None): Try[Seq[BagId]] = { fromStore .map(BagStore(_).enumBags(includeActive, includeInactive)) diff --git a/src/main/scala/nl.knaw.dans.easy.bagstore/server/BagsServletComponent.scala b/src/main/scala/nl.knaw.dans.easy.bagstore/server/BagsServletComponent.scala index df52c725..2f940014 100755 --- a/src/main/scala/nl.knaw.dans.easy.bagstore/server/BagsServletComponent.scala +++ b/src/main/scala/nl.knaw.dans.easy.bagstore/server/BagsServletComponent.scala @@ -104,5 +104,30 @@ trait BagsServletComponent { InternalServerError("Unexpected path") } } + + get("/filesizes/:uuid/*") { + val uuidStr = params("uuid") + multiParams("splat") match { + case Seq(path) => + ItemId.fromString(s"""$uuidStr/${ path }""") + .recoverWith { + case _: IllegalArgumentException => Failure(new IllegalArgumentException(s"invalid UUID string: $uuidStr")) + } + .flatMap(itemId => bagStores.getSize(itemId)) + .map(size => Ok(body = size)) + .getOrRecover { + case e: IllegalArgumentException => BadRequest(e.getMessage) + case e: NoSuchBagException => NotFound(e.getMessage) + case e: NoSuchItemException => NotFound(e.getMessage) + case e: NoRegularFileException => NotFound(e.getMessage) + case NonFatal(e) => + logger.error("Error retrieving bag", e) + InternalServerError(s"[${ new DateTime() }] Unexpected type of failure. Please consult the logs") + } + case p => + logger.error(s"Unexpected path: $p") + InternalServerError("Unexpected path") + } + } } } diff --git a/src/main/scala/nl.knaw.dans.easy.bagstore/server/StoresServletComponent.scala b/src/main/scala/nl.knaw.dans.easy.bagstore/server/StoresServletComponent.scala index 4bf8b4d3..63c64c10 100755 --- a/src/main/scala/nl.knaw.dans.easy.bagstore/server/StoresServletComponent.scala +++ b/src/main/scala/nl.knaw.dans.easy.bagstore/server/StoresServletComponent.scala @@ -147,6 +147,34 @@ trait StoresServletComponent { } } + get("/:bagstore/bags/filesizes/:uuid/*") { + val bagstore = params("bagstore") + val uuidStr = params("uuid") + multiParams("splat") match { + case Seq(path) => + bagStores.getBaseDirByShortname(bagstore) + .map(baseDir => ItemId.fromString(s"""$uuidStr/${ path }""") + .recoverWith { + case _: IllegalArgumentException => Failure(new IllegalArgumentException(s"Invalid UUID string: $uuidStr")) + } + .flatMap(itemId => bagStores.getSize(itemId, Some(baseDir))) + .map(size => Ok(body = size)) + .getOrRecover { + case e: IllegalArgumentException => BadRequest(e.getMessage) + case e: NoRegularFileException => NotFound(e.getMessage) + case e: NoSuchItemException => NotFound(e.getMessage) + case e: NoSuchBagException => NotFound(e.getMessage) + case NonFatal(e) => + logger.error("Error retrieving bag", e) + InternalServerError(s"[${ new DateTime() }] Unexpected type of failure. Please consult the logs") + }) + .getOrElse(NotFound(s"No such bag-store: $bagstore")) + case p => + logger.error(s"Unexpected path: $p") + InternalServerError("Unexpected path") + } + } + put("/:bagstore/bags/:uuid") { trace(()) basicAuth() diff --git a/src/test/scala/nl.knaw.dans.easy.bagstore/server/BagsServletSpec.scala b/src/test/scala/nl.knaw.dans.easy.bagstore/server/BagsServletSpec.scala index eb41cd87..5cbcb7ac 100755 --- a/src/test/scala/nl.knaw.dans.easy.bagstore/server/BagsServletSpec.scala +++ b/src/test/scala/nl.knaw.dans.easy.bagstore/server/BagsServletSpec.scala @@ -18,9 +18,9 @@ package nl.knaw.dans.easy.bagstore.server import java.nio.file.{ Files, Paths } import java.util.UUID -import nl.knaw.dans.lib.encode.PathEncoding import nl.knaw.dans.easy.bagstore._ import nl.knaw.dans.easy.bagstore.component.{ BagProcessingComponent, BagStoreComponent, BagStoresComponent, FileSystemComponent } +import nl.knaw.dans.lib.encode.PathEncoding import org.apache.commons.io.FileUtils import org.scalatra.test.EmbeddedJettyContainer import org.scalatra.test.scalatest.ScalatraSuite @@ -271,4 +271,41 @@ class BagsServletSpec extends TestSupportFixture body shouldBe s"Tried to retrieve an inactive bag: $bagID with toggle forceInactive = false" } } + + "get filesizes/uuid/*" should "return filesize when file is found and is a regular file" in { + val itemId = "01000000-0000-0000-0000-000000000001/data/sub/u" + get(s"/filesizes/$itemId", headers = Map("Accept" -> "text/plain")) { + body shouldBe "12" + status shouldBe 200 + } + } + + it should "fail when the bag is not found" in { + get(s"/filesizes/${ UUID.randomUUID() }/data/sub/u") { + status shouldBe 404 + } + } + + it should "fail when only the bag id is given" in { + val itemId = "01000000-0000-0000-0000-000000000001" + get(s"/filesizes/$itemId", headers = Map("Accept" -> "text/plain")) { + status shouldBe 400 + } + } + + it should "fail when the item is not a regular file" in { + val itemId = "01000000-0000-0000-0000-000000000001/data/sub" + get(s"/filesizes/$itemId", headers = Map("Accept" -> "text/plain")) { + status shouldBe 404 + body shouldBe s"Item $itemId is not a regular file." + } + } + + it should "fail when the file within a bag cannot be found" in { + val itemId = "01000000-0000-0000-0000-000000000001/data/nonexistent" + get(s"/filesizes/$itemId", headers = Map("Accept" -> "text/plain")) { + status shouldBe 404 + body shouldBe s"Item $itemId not found" + } + } }