Skip to content

Commit

Permalink
Merge pull request #3498 from nulib/4095-ark-handling
Browse files Browse the repository at this point in the history
Make sure ARKs update properly
  • Loading branch information
mbklein authored Aug 22, 2023
2 parents d5d7b4f + 290e971 commit 27334b2
Show file tree
Hide file tree
Showing 16 changed files with 533 additions and 238 deletions.
2 changes: 1 addition & 1 deletion app/lib/meadow/application/children.ex
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ defmodule Meadow.Application.Children do
interval: Config.index_interval(), version: 2, name: Meadow.Data.IndexWorker.V2}
],
"database_listeners" => [
Meadow.ARKListener,
Meadow.ArkListener,
Meadow.FilesetDeleteListener,
Meadow.IIIF.ManifestListener,
Meadow.IndexDeleteListener,
Expand Down
91 changes: 82 additions & 9 deletions app/lib/meadow/ark.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ defmodule Meadow.Ark do

alias Meadow.Ark.{Client, Serializer}
alias Meadow.Config
alias Meadow.Data.Schemas.ArkCache
alias Meadow.Repo

import Ecto.Query

require Logger

defstruct ark: nil,
creator: nil,
Expand All @@ -13,7 +19,10 @@ defmodule Meadow.Ark do
publication_year: nil,
resource_type: nil,
status: nil,
target: nil
target: nil,
work_id: nil

def from_attrs(attributes), do: struct!(__MODULE__, Enum.into(attributes, []))

@doc """
Mint a new ARK identifier
Expand Down Expand Up @@ -66,7 +75,9 @@ defmodule Meadow.Ark do
case Client.post("/shoulder/#{shoulder}", Serializer.serialize(ark)) do
{:ok, %{status_code: status, body: body}} when status in 200..201 ->
new_id = Serializer.deserialize(body) |> Map.get(:ark)
{:ok, Map.put(ark, :ark, new_id)}
ark = Map.put(ark, :ark, new_id)
put_in_cache(ark)
{:ok, ark}

{:ok, %{body: body}} ->
{:error, body}
Expand Down Expand Up @@ -103,6 +114,25 @@ defmodule Meadow.Ark do
{:error, "error: bad request - no such identifier"}
"""
def get(id) do
case get_from_cache(id) do
nil ->
case get_from_source(id) do
{:ok, ark} ->
put_in_cache(ark)
{:ok, ark}

other ->
other
end

ark ->
{:ok, ark}
end
end

def get_from_source(id) do
Logger.debug("Retrieving ark #{id} from source")

case Client.get("/id/#{id}") do
{:ok, %{status_code: 200, body: body}} -> {:ok, Serializer.deserialize(body)}
{:ok, %{body: body}} -> {:error, body}
Expand Down Expand Up @@ -135,15 +165,19 @@ defmodule Meadow.Ark do

def put(%__MODULE__{} = ark) do
case Client.put("/id/#{ark.ark}?update_if_exists=yes", Serializer.serialize(ark)) do
{:ok, %{status_code: status}} when status in 200..201 -> {:ok, ark}
{:ok, %{body: body}} -> {:error, body}
{:error, error} -> {:error, error}
{:ok, %{status_code: status}} when status in 200..201 ->
put_in_cache(ark)
{:ok, ark}

{:ok, %{body: body}} ->
{:error, body}

{:error, error} ->
{:error, error}
end
end

def put(attributes) do
put(struct!(__MODULE__, Enum.into(attributes, [])))
end
def put(attributes), do: from_attrs(attributes) |> put()

@doc """
Remove the ARK identifier
Expand All @@ -158,7 +192,46 @@ defmodule Meadow.Ark do
"""
def delete(id) do
case Client.delete("/id/#{id}") do
{:ok, %{status_code: 200, body: body}} -> {:ok, Serializer.deserialize(body)}
{:ok, %{status_code: 200, body: body}} ->
delete_from_cache(id)
{:ok, Serializer.deserialize(body)}

other ->
other
end
end

def digest(%__MODULE__{} = ark), do: :crypto.hash(:md5, Serializer.serialize(ark))
def digest(attributes), do: from_attrs(attributes) |> digest()

def clear_cache, do: ArkCache |> Repo.delete_all()

def delete_from_cache(id) do
from(c in ArkCache, where: c.ark == ^id)
|> Repo.delete_all()
end

def get_from_cache(id) do
Logger.debug("Retrieving ark #{id} from cache")

from(c in ArkCache, where: c.ark == ^id)
|> Repo.one()
|> from_cache()
end

def put_in_cache(ark) do
from(c in ArkCache, where: c.ark == ^ark.ark)
|> Repo.one()
|> ArkCache.changeset(Map.from_struct(ark))
|> Repo.insert_or_update()
end

def from_cache(nil), do: nil

def from_cache(%ArkCache{} = cache) do
cache
|> Map.from_struct()
|> Enum.reject(fn {_, v} -> not is_binary(v) end)
|> from_attrs()
end
end
4 changes: 3 additions & 1 deletion app/lib/meadow/ark/serializer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@ defmodule Meadow.Ark.Serializer do
Enum.reduce(ark, ["_profile: datacite"], fn
{_, nil}, acc -> acc
{:ark, _}, acc -> acc
{:work_id, _}, acc -> acc
entry, acc -> [serialize(entry) | acc]
end)
|> Enum.reverse()
|> Enum.join("\n")
end

def serialize({key, value}) when is_atom(key), do: Map.get(@datacite_map, key) <> ": " <> String.replace(value, "%", "%25")
def serialize({key, value}) when is_atom(key),
do: Map.get(@datacite_map, key) <> ": " <> String.replace(value, "%", "%25")
end
14 changes: 10 additions & 4 deletions app/lib/meadow/ark_listener.ex
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
defmodule Meadow.ARKListener do
defmodule Meadow.ArkListener do
@moduledoc """
Listens to INSERTS/UPDATES on Postgrex.Notifications topic "works_changed" and writes
updates ARK metadata
"""

use Meadow.DatabaseNotification, tables: [:works]
use Meadow.Utils.Logging
alias Meadow.Arks
alias Meadow.Data.Works
require Logger

@impl true
def handle_notification(:works, :delete, %{id: _id}, state) do
def handle_notification(:works, :delete, %{id: id}, state) do
Arks.work_deleted(id)
{:noreply, state}
end

def handle_notification(:works, :insert, %{id: _id}, state) do
def handle_notification(:works, :insert, %{id: id}, state) do
Works.get_work(id) |> Arks.mint_ark()
{:noreply, state}
end

Expand All @@ -38,7 +41,10 @@ defmodule Meadow.ARKListener do
"Updating ARK metadata for work: #{work.id}, with ark: #{work.descriptive_metadata.ark}"
)

case Works.update_ark_metatdata(work) do
case Arks.update_ark_metatdata(work) do
:noop ->
:noop

{:ok, _result} ->
:noop

Expand Down
36 changes: 36 additions & 0 deletions app/lib/meadow/data/schemas/ark_cache.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
defmodule Meadow.Data.Schemas.ArkCache do
@moduledoc """
Schema for caching ARKs
"""
use Ecto.Schema
import Ecto.Changeset

@primary_key {:ark, :string, autogenerate: false, read_after_writes: true}
schema "ark_cache" do
field(:creator, :string)
field(:title, :string)
field(:publisher, :string)
field(:publication_year, :string)
field(:resource_type, :string)
field(:status, :string)
field(:target, :string)
field(:work_id, Ecto.UUID)
end

def changeset(ark \\ %__MODULE__{}, params)
def changeset(nil, params), do: changeset(params)

def changeset(ark, params) do
cast(ark, params, [
:ark,
:creator,
:title,
:publisher,
:publication_year,
:resource_type,
:status,
:target,
:work_id
])
end
end
4 changes: 3 additions & 1 deletion app/lib/meadow/data/schemas/work_descriptive_metadata.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defmodule Meadow.Data.Schemas.WorkDescriptiveMetadata do

import Ecto.Changeset
use Ecto.Schema
alias Meadow.Data.Schemas.{ControlledMetadataEntry, NoteEntry, RelatedURLEntry}
alias Meadow.Data.Schemas.{ArkCache, ControlledMetadataEntry, NoteEntry, RelatedURLEntry}
alias Meadow.Data.Types

# {field_name, repeating}
Expand Down Expand Up @@ -85,6 +85,8 @@ defmodule Meadow.Data.Schemas.WorkDescriptiveMetadata do
embeds_many(:notes, NoteEntry, on_replace: :delete)
embeds_many(:related_url, RelatedURLEntry, on_replace: :delete)

belongs_to(:cached_ark, ArkCache, foreign_key: :ark, references: :ark, define_field: false)

timestamps()
end

Expand Down
Loading

0 comments on commit 27334b2

Please sign in to comment.