From cebfa1c7cbd30bddf2fb99c2b4fe84b24fe65b80 Mon Sep 17 00:00:00 2001 From: Mark Patton Date: Mon, 14 Oct 2024 11:19:53 -0400 Subject: [PATCH] Modify DOI service to not return entries without a URL and tighten up the filename parsing. --- .../pass/doi/service/UnpaywallDoiService.java | 26 +++++++++++++++---- .../doi/service/UnpaywallDoiServiceTest.java | 5 ++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/pass-core-doi-service/src/main/java/org/eclipse/pass/doi/service/UnpaywallDoiService.java b/pass-core-doi-service/src/main/java/org/eclipse/pass/doi/service/UnpaywallDoiService.java index 80f447a2..68143a08 100644 --- a/pass-core-doi-service/src/main/java/org/eclipse/pass/doi/service/UnpaywallDoiService.java +++ b/pass-core-doi-service/src/main/java/org/eclipse/pass/doi/service/UnpaywallDoiService.java @@ -16,6 +16,9 @@ */ package org.eclipse.pass.doi.service; +import java.io.File; +import java.net.URI; +import java.net.URISyntaxException; import java.util.HashMap; import javax.json.Json; import javax.json.JsonArray; @@ -57,6 +60,20 @@ public HashMap headerMap() { return null; } + private String get_filename(String url) { + try { + URI uri = new URI(url); + + if (uri.getPath() == null) { + return null; + } + + return new File(uri.getPath()).getName(); + } catch (URISyntaxException e) { + return null; + } + } + @Override public JsonObject processObject(JsonObject object) { JsonArray locations = object.getJsonArray("oa_locations"); @@ -67,14 +84,13 @@ public JsonObject processObject(JsonObject object) { JsonValue urlForPdf = manuscript.getValue("/url_for_pdf"); JsonValue isBest = manuscript.getValue("/is_best"); - JsonValue filename; if ( urlForPdf == JsonValue.NULL ) { - filename = JsonValue.NULL; - } else { - String urlForPdfString = urlForPdf.toString().replaceAll("\"",""); - filename = Json.createValue (urlForPdfString.substring(urlForPdfString.lastIndexOf('/') + 1)); + continue; } + String name = get_filename(manuscript.getString("url_for_pdf")); + JsonValue filename = name == null ? JsonValue.NULL : Json.createValue(name); + JsonValue repoInst = manuscript.getValue("/repository_institution"); JsonObject manuscriptObject = Json.createObjectBuilder().add("url", urlForPdf) diff --git a/pass-core-doi-service/src/test/java/org/eclipse/pass/doi/service/UnpaywallDoiServiceTest.java b/pass-core-doi-service/src/test/java/org/eclipse/pass/doi/service/UnpaywallDoiServiceTest.java index 5a6934f0..fc71c9df 100644 --- a/pass-core-doi-service/src/test/java/org/eclipse/pass/doi/service/UnpaywallDoiServiceTest.java +++ b/pass-core-doi-service/src/test/java/org/eclipse/pass/doi/service/UnpaywallDoiServiceTest.java @@ -31,9 +31,8 @@ public class UnpaywallDoiServiceTest { "\"repositoryLabel\":null,\"type\":\"application/pdf\",\"source\":\"Unpaywall\"," + "\"name\":\"CMC.S38446\",\"isBest\":true},{\"url\":\"https://europepmc.org/articles/pmc5072460?pdf=render\"," + "\"repositoryLabel\":\"PubMed Central - Europe PMC\",\"type\":\"application/pdf\"," + - "\"source\":\"Unpaywall\",\"name\":\"pmc5072460?pdf=render\",\"isBest\":false}," + - "{\"url\":null,\"repositoryLabel\":null,\"type\":\"application/pdf\"," + - "\"source\":\"Unpaywall\",\"name\":null,\"isBest\":false}]}"; + "\"source\":\"Unpaywall\",\"name\":\"pmc5072460\",\"isBest\":false}" + + "]}"; @Test public void testProcessObject() {