Skip to content

Commit

Permalink
Merge pull request #4114 from nulib/5132-escape-anvl
Browse files Browse the repository at this point in the history
Escape colons in Ark requests
  • Loading branch information
mbklein authored Aug 21, 2024
2 parents ea16769 + 58e33ae commit e4fa42d
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 35 deletions.
15 changes: 13 additions & 2 deletions app/lib/meadow/ark/serializer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ defmodule Meadow.Ark.Serializer do
|> Enum.join("\n")
end

def serialize({key, value}) when is_atom(key),
do: Map.get(@datacite_map, key) <> ": " <> String.replace(value, "%", "%25")
def serialize({key, value}) when is_atom(key) do
escapable =
case key do
:target -> "%\r\n"
_ -> ":%\r\n"
end

[
Map.get(@datacite_map, key),
URI.encode(value, fn c -> not String.contains?(escapable, <<c>>) end)
]
|> Enum.join(": ")
end
end
6 changes: 3 additions & 3 deletions app/lib/meadow/search/bulk.ex
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ defmodule Meadow.Search.Bulk do
defp upload_batch(docs, index) do
with_log_metadata module: __MODULE__, index: index do
bulk_document = docs |> Enum.join("\n")

Logger.info("Uploading batch of #{Enum.count(docs)} documents to #{index}")

case HTTP.post("/#{index}/_bulk", bulk_document <> "\n") do
{:ok, %{status_code: status} = response} ->
Logger.info("Bulk upload status: #{status}")
{:ok, response}

{:retry, response} ->
Logger.warn("Bulk upload retrying")
Logger.warning("Bulk upload retrying")
{:retry, response}

{:error, error} ->
Expand Down
2 changes: 0 additions & 2 deletions app/lib/meadow/search/config.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ defmodule Meadow.Search.Config do
@moduledoc """
Convenience methods for retrieving search-specific configuration
"""
alias Meadow.Search.HTTP

require Logger

def index_configs do
Expand Down
2 changes: 1 addition & 1 deletion app/lib/meadow/utils/arks.ex
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ defmodule Meadow.Arks do
"""
def mint_ark(%Work{descriptive_metadata: %{ark: ark}} = work)
when not is_nil(ark) do
Logger.warn("Not minting ARK for work #{work.id} because it already has one: #{ark}")
Logger.warning("Not minting ARK for work #{work.id} because it already has one: #{ark}")
{:noop, work}
end

Expand Down
14 changes: 0 additions & 14 deletions app/lib/meadow_web/resolvers/data.ex
Original file line number Diff line number Diff line change
Expand Up @@ -164,20 +164,6 @@ defmodule MeadowWeb.Resolvers.Data do
end
end

def replace_file_set(_, %{id: id} = params, _) do
file_set = FileSets.get_file_set!(id)

case Pipeline.replace_the_file_set(file_set, Map.delete(params, :id)) do
{:error, changeset} ->
{:error,
message: "Could not replace file set",
details: ChangesetErrors.humanize_errors(changeset)}

{:ok, file_set} ->
{:ok, file_set}
end
end

def update_file_set(_, %{id: id} = params, _) do
file_set = FileSets.get_file_set!(id)

Expand Down
54 changes: 41 additions & 13 deletions app/test/meadow/ark/serializer_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,65 @@ defmodule Meadow.Ark.SerializerTest do

alias Meadow.Ark.Serializer

@response_body "success: ark:/99999/fk4z90ps4x\n_updated: 1630613597\ndatacite.publisher: Test publisher\n_profile: datacite\ndatacite.title: Test title\n_export: yes\ndatacite.creator: Test creator\n_owner: apitest\n_ownergroup: apitest\n_target: https://test/items/123\n_created: 1630613597\ndatacite.publicationyear: 2021\ndatacite.resourcetype: Image\n_status: public\n"
@request_payload """
_profile: datacite
datacite.creator: Test %25 creator
datacite.publicationyear: 2021
datacite.publisher: Publisher%3A Test
datacite.resourcetype: Image
_status: public
_target: https://test/items/123
datacite.title: 100%25
"""

describe "serialize/1" do
test "desconstructs a Meadow.Ark and properly handles ANVL escaping of % characters" do
ark = %Meadow.Ark{
ark: "ark:/99999/fk4z90ps4x",
creator: "Test % creator",
publication_year: "2021",
publisher: "%Test publisher%",
publisher: "Publisher: Test",
resource_type: "Image",
status: "public",
target: "https://test/items/123",
title: "100%"
}

assert Serializer.serialize(ark) == "_profile: datacite\ndatacite.creator: Test %25 creator\ndatacite.publicationyear: 2021\ndatacite.publisher: %25Test publisher%25\ndatacite.resourcetype: Image\n_status: public\n_target: https://test/items/123\ndatacite.title: 100%25"
assert Serializer.serialize(ark) == String.trim(@request_payload)
end
end

@response_body """
success: ark:/99999/fk4z90ps4x
_updated: 1630613597
datacite.publisher: Test publisher
_profile: datacite
datacite.title: Test title
_export: yes
datacite.creator: Test creator
_owner: apitest
_ownergroup: apitest
_target: https://test/items/123
_created: 1630613597
datacite.publicationyear: 2021
datacite.resourcetype: Image
_status: public
"""

describe "deserialize/1" do
test "builds a Meadow.Ark struct" do
assert %Meadow.Ark{
ark: "ark:/99999/fk4z90ps4x",
creator: "Test creator",
publication_year: "2021",
publisher: "Test publisher",
resource_type: "Image",
status: "public",
target: "https://test/items/123",
title: "Test title"
} = Serializer.deserialize(@response_body)
expected = %Meadow.Ark{
ark: "ark:/99999/fk4z90ps4x",
creator: "Test creator",
publication_year: "2021",
publisher: "Test publisher",
resource_type: "Image",
status: "public",
target: "https://test/items/123",
title: "Test title"
}

assert Serializer.deserialize(@response_body) == expected
end
end
end

0 comments on commit e4fa42d

Please sign in to comment.