Skip to content

Commit

Permalink
File set version API
Browse files Browse the repository at this point in the history
  • Loading branch information
kdid committed Aug 7, 2023
1 parent 9961dec commit aef64bf
Show file tree
Hide file tree
Showing 16 changed files with 247 additions and 69 deletions.
2 changes: 1 addition & 1 deletion app/assets/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion app/config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ config :meadow,
"api_token_ttl" => 300,
"base_url" => "http://dcapi-test.northwestern.edu"
}
]
],
iiif_distribution_id: nil

if System.get_env("AWS_DEV_ENVIRONMENT") |> is_nil() do
[:mediaconvert, :s3, :secretsmanager, :sns, :sqs]
Expand Down
2 changes: 1 addition & 1 deletion app/lib/meadow/config.ex
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ defmodule Meadow.Config do

@doc "Retrieve the IIIF cloudfront distribution id"
def iiif_cloudfront_distribution_id do
Application.get_env(:meadow, :iiif_cloudfront_distribution_id)
Application.get_env(:meadow, :iiif_distribution_id)
end

@doc "Retrieve the IIIF server endpoint"
Expand Down
18 changes: 18 additions & 0 deletions app/lib/meadow/data/file_sets.ex
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,24 @@ defmodule Meadow.Data.FileSets do
Repo.delete(file_set)
end

@doc """
Replaces (versions) a FileSet.
"""
def replace_file_set(%FileSet{} = file_set, attrs) do
changeset =
FileSet.update_changeset(file_set, attrs)
|> validate_poster_offset(file_set)

response = Repo.update(changeset)

case response do
{:ok, _file_set} -> post_process(changeset)
other -> other
end

response
end

@doc """
Updates a FileSet.
"""
Expand Down
23 changes: 12 additions & 11 deletions app/lib/meadow/data/schemas/file_set.ex
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,26 @@ defmodule Meadow.Data.Schemas.FileSet do
@foreign_key_type Ecto.UUID
@timestamps_opts [type: :utc_datetime_usec]
schema "file_sets" do
field :accession_number
field :extracted_metadata, :map
field :role, Types.CodedTerm
field :rank, :integer
field :position, :any, virtual: true
field :derivatives, :map
field :poster_offset, :integer
field(:accession_number)
field(:extracted_metadata, :map)
field(:role, Types.CodedTerm)
field(:rank, :integer)
field(:position, :any, virtual: true)
field(:derivatives, :map)
field(:poster_offset, :integer)
field(:reindex_at, :utc_datetime_usec)

embeds_one :core_metadata, FileSetCoreMetadata, on_replace: :update
embeds_one :structural_metadata, FileSetStructuralMetadata, on_replace: :delete
embeds_one(:core_metadata, FileSetCoreMetadata, on_replace: :update)
embeds_one(:structural_metadata, FileSetStructuralMetadata, on_replace: :delete)
timestamps()

belongs_to :work, Work
belongs_to(:work, Work)

has_many :action_states, ActionState,
has_many(:action_states, ActionState,
references: :id,
foreign_key: :object_id,
on_delete: :delete_all
)
end

defp changeset_params do
Expand Down
15 changes: 13 additions & 2 deletions app/lib/meadow/pipeline.ex
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ defmodule Meadow.Pipeline do
end
end

def replace_the_file_set(file_set, attrs \\ %{}) do
case FileSets.replace_file_set(file_set, attrs) do
{:ok, file_set} ->
Task.async(fn -> wait_for_checksum_tags(file_set, %{context: "Version"}) end)
{:ok, file_set}

{:error, changeset} ->
{:error, changeset}
end
end

def kickoff(_, context \\ %{})

def kickoff(%FileSet{} = file_set, context), do: kickoff(file_set.id, context)
Expand All @@ -48,14 +59,14 @@ defmodule Meadow.Pipeline do
end
end

defp wait_for_checksum_tags(%{core_metadata: %{location: location}} = file_set) do
defp wait_for_checksum_tags(%{core_metadata: %{location: location}} = file_set, context \\ %{}) do
with %{host: bucket, path: "/" <> key} <- URI.parse(location) do
case wait(AWS.check_object_tags!(bucket, key, Config.required_checksum_tags()),
timeout: Config.checksum_wait_timeout(),
frequency: 1_000
) do
{:ok, true} ->
kickoff(file_set, %{role: file_set.role.id})
kickoff(file_set, Map.merge(context, %{role: file_set.role.id}))

{:timeout, timeout} ->
Logger.error(
Expand Down
9 changes: 7 additions & 2 deletions app/lib/meadow/pipeline/actions/create_pyramid_tiff.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defmodule Meadow.Pipeline.Actions.CreatePyramidTiff do
alias Meadow.Config
alias Meadow.Data.{ActionStates, FileSets}
alias Meadow.Repo
alias Meadow.Utils.Lambda
alias Meadow.Utils.{AWS, Lambda}
use Meadow.Pipeline.Actions.Common

@timeout 240_000
Expand All @@ -16,7 +16,7 @@ defmodule Meadow.Pipeline.Actions.CreatePyramidTiff do
|> Meadow.Utils.Stream.exists?()
end

def process(file_set, _) do
def process(file_set, attributes) do
source = file_set.core_metadata.location
target = FileSets.pyramid_uri_for(file_set.id)

Expand All @@ -28,6 +28,10 @@ defmodule Meadow.Pipeline.Actions.CreatePyramidTiff do
ActionStates.set_state!(file_set, __MODULE__, "ok")
end)

with %{context: "Version"} <- attributes do
AWS.invalidate_cache(file_set, :pyramid)
end

:ok

{:error, {:http_error, status, message}} ->
Expand All @@ -50,4 +54,5 @@ defmodule Meadow.Pipeline.Actions.CreatePyramidTiff do
Logger.error("Invalid location: #{source}")
{:error, "Invalid location: #{source}"}
end

end
49 changes: 2 additions & 47 deletions app/lib/meadow/pipeline/actions/generate_poster_image.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defmodule Meadow.Pipeline.Actions.GeneratePosterImage do
alias Meadow.Data.FileSets
alias Meadow.Data.Schemas.FileSet
alias Meadow.Repo
alias Meadow.Utils.Lambda
alias Meadow.Utils.{AWS, Lambda}

use Meadow.Pipeline.Actions.Common

Expand Down Expand Up @@ -49,8 +49,7 @@ defmodule Meadow.Pipeline.Actions.GeneratePosterImage do
FileSets.update_file_set(file_set, %{derivatives: derivatives})
end)

iiif_cloudfront_distribution_id = Config.iiif_cloudfront_distribution_id()
invalidate_cache(file_set, iiif_cloudfront_distribution_id)
AWS.invalidate_cache(file_set, :poster)
end

defp handle_generate_poster_result({:error, error}, _file_set, destination) do
Expand All @@ -69,48 +68,4 @@ defmodule Meadow.Pipeline.Actions.GeneratePosterImage do
@timeout
)
end

defp invalidate_cache(file_set, nil) do
Logger.info(
"Skipping poster cache invalidation for file set: #{file_set.id}. No distribution id found."
)

:ok
end

defp invalidate_cache(file_set, distribution_id) do
version = "2020-05-31"
caller_reference = "meadow-app-#{Ecto.UUID.generate()}"
path = "/iiif/2/posters/#{file_set.id}/*"

data = """
<?xml version="1.0" encoding="UTF-8"?>
<InvalidationBatch xmlns="http://cloudfront.amazonaws.com/doc/#{version}/">
<CallerReference>#{caller_reference}</CallerReference>
<Paths>
<Items>
<Path>#{path}</Path>
</Items>
<Quantity>1</Quantity>
</Paths>
</InvalidationBatch>
"""

operation = %ExAws.Operation.RestQuery{
action: :create_invalidation,
body: data,
http_method: :post,
path: "/#{version}/distribution/#{distribution_id}/invalidation",
service: :cloudfront
}

case operation |> ExAws.request() do
{:ok, status_code: status_code} when status_code in 200..299 ->
:ok

_ ->
Logger.error("Unable to clear poster cache for #{path}")
:ok
end
end
end
58 changes: 58 additions & 0 deletions app/lib/meadow/utils/aws.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ defmodule Meadow.Utils.AWS do
@moduledoc """
Utility functions for AWS requests and object management
"""
alias Meadow.Config
alias Meadow.Error
alias Meadow.Utils.AWS.MultipartCopy

import Env
import SweetXml, only: [sigil_x: 2]

require Logger

@doc """
Drop-in replacement for ExAws.request/2 that reports errors to Honeybadger
"""
Expand Down Expand Up @@ -74,6 +78,60 @@ defmodule Meadow.Utils.AWS do
def copy_object(dest_bucket, dest_object, src_bucket, src_object, opts \\ []),
do: MultipartCopy.copy_object(dest_bucket, dest_object, src_bucket, src_object, opts)

def invalidate_cache(file_set, invalidation_type), do: invalidate_cache(file_set, invalidation_type, Config.environment())
def invalidate_cache(file_set, :pyramid, :dev), do: perform_invalidation("/iiif/2/#{prefix()}/#{file_set.id}/*")
def invalidate_cache(file_set, :pyramid, :test), do: perform_invalidation("/iiif/2/#{prefix()}/#{file_set.id}/*")
def invalidate_cache(file_set, :pyramid, _), do: perform_invalidation("/iiif/2/#{file_set.id}/*")
def invalidate_cache(file_set, :poster, :dev), do: perform_invalidation("/iiif/2/#{prefix()}/posters/#{file_set.id}/*")
def invalidate_cache(file_set, :poster, :test), do: perform_invalidation("/iiif/2/#{prefix()}/posters/#{file_set.id}/*")
def invalidate_cache(file_set, :poster, _), do: perform_invalidation("/iiif/2/posters#{file_set.id}/*")

defp perform_invalidation(path), do: perform_invalidation(path, Config.iiif_cloudfront_distribution_id())

defp perform_invalidation(path, nil) do
Logger.info(
"Skipping poster cache invalidation for: #{path}. No distribution id found."

)

:ok
end

defp perform_invalidation(path, distribution_id) do
version = "2020-05-31"
caller_reference = "meadow-app-#{Ecto.UUID.generate()}"

data = """
<?xml version="1.0" encoding="UTF-8"?>
<InvalidationBatch xmlns="http://cloudfront.amazonaws.com/doc/#{version}/">
<CallerReference>#{caller_reference}</CallerReference>
<Paths>
<Items>
<Path>#{path}</Path>
</Items>
<Quantity>1</Quantity>
</Paths>
</InvalidationBatch>
"""

operation = %ExAws.Operation.RestQuery{
action: :create_invalidation,
body: data,
http_method: :post,
path: "/#{version}/distribution/#{distribution_id}/invalidation",
service: :cloudfront
}

case operation |> ExAws.request() do
{:ok, %{status_code: status_code}} when status_code in 200..299 ->
:ok

_ ->
Logger.error("Unable to clear poster cache for #{path}")
:ok
end
end

defp generate_aws_signature(request, region, access_key, secret) do
now = NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second)

Expand Down
14 changes: 14 additions & 0 deletions app/lib/meadow_web/resolvers/data.ex
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,20 @@ defmodule MeadowWeb.Resolvers.Data do
end
end

def replace_file_set(_, %{id: id} = params, _) do
file_set = FileSets.get_file_set!(id)

case Pipeline.replace_the_file_set(file_set, Map.delete(params, :id)) do
{:error, changeset} ->
{:error,
message: "Could not replace file set",
details: ChangesetErrors.humanize_errors(changeset)}

{:ok, file_set} ->
{:ok, file_set}
end
end

def update_file_set(_, %{id: id} = params, _) do
file_set = FileSets.get_file_set!(id)

Expand Down
16 changes: 13 additions & 3 deletions app/lib/meadow_web/schema/types/data/file_set_types.ex
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ defmodule MeadowWeb.Schema.Data.FileSetTypes do
resolve(&Resolvers.Data.update_file_set/3)
end

@desc "Replace file set (create new version)"
field :replace_file_set, :file_set do
arg(:id, non_null(:id))
arg(:core_metadata, non_null(:file_set_core_metadata_input))
middleware(Middleware.Authenticate)
middleware(Middleware.Authorize, "Editor")
resolve(&Resolvers.Data.replace_file_set/3)
end

@desc "Update metadata for a list of fileSets"
field :update_file_sets, list_of(:file_set) do
arg(:file_sets, non_null(list_of(:file_set_update)))
Expand Down Expand Up @@ -153,9 +162,10 @@ defmodule MeadowWeb.Schema.Data.FileSetTypes do

@desc "`digests` represents the possible digest hashes for a file set."
object :digests do
field :md5, :string, do: resolve(fn digests, _, _ -> {:ok, Map.get(digests, "md5")} end)
field :sha1, :string, do: resolve(fn digests, _, _ -> {:ok, Map.get(digests, "sha1")} end)
field :sha256, :string, do: resolve(fn digests, _, _ -> {:ok, Map.get(digests, "sha256")} end)
field(:md5, :string, do: resolve(fn digests, _, _ -> {:ok, Map.get(digests, "md5")} end))
field(:sha1, :string, do: resolve(fn digests, _, _ -> {:ok, Map.get(digests, "sha1")} end))

field(:sha256, :string, do: resolve(fn digests, _, _ -> {:ok, Map.get(digests, "sha256")} end))
end

@desc "`file_set_structural_metadata` represents the structural metadata within a file set object."
Expand Down
13 changes: 13 additions & 0 deletions app/test/gql/ReplaceFileSet.gql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#import "./FileSetFields.frag.gql"

mutation(
$id: ID!
$coreMetadata: FileSetCoreMetadataInput!
) {
replaceFileSet(
id: $id
coreMetadata: $coreMetadata
) {
...FileSetFields
}
}
15 changes: 15 additions & 0 deletions app/test/meadow/data/file_sets_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,21 @@ defmodule Meadow.Data.FileSetsTest do
}
end

test "replace_file_set/1 with new location updates file_set" do
file_set = file_set_fixture()

replace_attrs = %{
id: file_set.id,
core_metadata: %{
location: "https://example.com",
original_filename: "test.tiff"
}
}

assert {:ok, %FileSet{} = file_set} = FileSets.replace_file_set(file_set, replace_attrs)
assert file_set.core_metadata.location == "https://example.com"
end

test "get_file_set!/1 returns a file set by id" do
file_set = file_set_fixture()
assert FileSets.get_file_set!(file_set.id) == file_set
Expand Down
Loading

0 comments on commit aef64bf

Please sign in to comment.