From 74f6d612ea5cf86ffd23697fa44f6e454d39c973 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 12 Jun 2024 18:05:45 +0100 Subject: [PATCH] simple services for ID lookup https://github.com/CatalogueOfLife/backend/issues/1318 --- .../life/catalogue/matching/DatasetIndex.java | 84 +++++++++++++++++++ .../life/catalogue/matching/ExternalID.java | 31 +++++++ .../life/catalogue/matching/IDController.java | 36 ++++++++ .../catalogue/matching/MatchController.java | 1 + .../catalogue/matching/MatchingService.java | 8 ++ 5 files changed, 160 insertions(+) create mode 100644 matching-ws/src/main/java/life/catalogue/matching/ExternalID.java create mode 100644 matching-ws/src/main/java/life/catalogue/matching/IDController.java diff --git a/matching-ws/src/main/java/life/catalogue/matching/DatasetIndex.java b/matching-ws/src/main/java/life/catalogue/matching/DatasetIndex.java index c1cda3651..12b592fb9 100644 --- a/matching-ws/src/main/java/life/catalogue/matching/DatasetIndex.java +++ b/matching-ws/src/main/java/life/catalogue/matching/DatasetIndex.java @@ -494,6 +494,90 @@ public static class IDMatchResult { boolean matchOnlyInJoinIndex; } + /** + * Matches an external ID. Intended for debug purposes only, to quickly + * check if ids are present and joined to main index or not. + * + * @param identifier the identifier to match + * @return IDMatchResult with the document, datasetKey, and flags + */ + public List lookupIdentifier(@NotNull String identifier) { + List results = new ArrayList<>(); + + try { + // if join indexes are present, add them to the match + if (identifierSearchers != null && !identifierSearchers.isEmpty()) { + for (Dataset dataset : identifierSearchers.keySet()) { + // find the index and search it + IndexSearcher identifierSearcher = identifierSearchers.get(dataset); + Query identifierQuery = new TermQuery(new Term(FIELD_ID, identifier)); + TopDocs identifierDocs = identifierSearcher.search(identifierQuery, 3); + + if (identifierDocs.totalHits.value > 0) { + Document identifierDoc = identifierSearcher.storedFields().document(identifierDocs.scoreDocs[0].doc); + results.add(toExternalID(identifierDoc, dataset.getKey().toString())); + } + } + } + } catch (IOException e) { + log.error("Problem querying external ID indexes with {}", identifier, e); + } + // no indexes available + return results; + } + + /** + * Matches an external ID. Intended for debug purposes only, to quickly + * check if ids are present and joined to main index or not. + * + * @param datasetID the datasetKey to match + * @param identifier the identifier to match + * @return IDMatchResult with the document, datasetKey, and flags + */ + public List lookupIdentifier(@NotNull String datasetID, @NotNull String identifier) { + List results = new ArrayList<>(); + + try { + // if join indexes are present, add them to the match + if (identifierSearchers != null && !identifierSearchers.isEmpty()) { + for (Dataset dataset : identifierSearchers.keySet()) { + + // use the prefix mapping + if (dataset.getKey().toString().equals(datasetID) || (dataset.getGbifKey() != null && dataset.getGbifKey().equals(datasetID))) { + + // find the index and search it + IndexSearcher identifierSearcher = identifierSearchers.get(dataset); + Query identifierQuery = new TermQuery(new Term(FIELD_ID, identifier)); + TopDocs identifierDocs = identifierSearcher.search(identifierQuery, 3); + + if (identifierDocs.totalHits.value > 0) { + Document identifierDoc = identifierSearcher.storedFields(). + document(identifierDocs.scoreDocs[0].doc); + + results.add(toExternalID(identifierDoc, dataset.getKey().toString())); + } + } + } + } + } catch (IOException e) { + log.error("Problem querying external ID indexes with {}", identifier, e); + } + // no indexes available + return results; + } + + private static ExternalID toExternalID(Document doc, String datasetKey) { + return ExternalID.builder() + .id(doc.get(FIELD_ID)) + .datasetKey(datasetKey) + .scientificName(doc.get(FIELD_SCIENTIFIC_NAME)) + .rank(doc.get(FIELD_RANK)) + .parentID(doc.get(FIELD_PARENT_ID)) + .status(doc.get(FIELD_STATUS)) + .mainIndexID(doc.get(FIELD_JOIN_ID)) + .build(); + } + /** * Matches an external ID * @param key the external ID to match diff --git a/matching-ws/src/main/java/life/catalogue/matching/ExternalID.java b/matching-ws/src/main/java/life/catalogue/matching/ExternalID.java new file mode 100644 index 000000000..9e56cf915 --- /dev/null +++ b/matching-ws/src/main/java/life/catalogue/matching/ExternalID.java @@ -0,0 +1,31 @@ +package life.catalogue.matching; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Builder; +import lombok.Data; + +@Schema(description = "An identifier from another checklist that may be associated with a name usage in the main index", title = "ExternalID", type = "object") +@Data +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +@Builder +public class ExternalID { + @Schema(description = "The external identifier that may or may not have been associated with the main index") + private String id; + @Schema(description = "The main index identifier that the external identifier was matched to") + private String mainIndexID; + @Schema(description = "The dataset key of the main index identifier") + private String datasetKey; + @Schema(description = "The parent ID of the external identifier") + private String parentID; + @Schema(description = "The scientific name associated with the external identifier") + private String scientificName; + @Schema(description = "The accepted taxon ID of the external identifier") + private String acceptedTaxonID; + @Schema(description = "The rank of the external identifier") + private String rank; + @Schema(description = "The status of the external identifier") + private String status; +} diff --git a/matching-ws/src/main/java/life/catalogue/matching/IDController.java b/matching-ws/src/main/java/life/catalogue/matching/IDController.java new file mode 100644 index 000000000..404d95322 --- /dev/null +++ b/matching-ws/src/main/java/life/catalogue/matching/IDController.java @@ -0,0 +1,36 @@ +package life.catalogue.matching; + +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.tags.Tag; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RestController; + +@RestController +public class IDController { + + @Autowired + MatchingService matchingService; + + @ApiResponse(responseCode = "200", description = "Name usage suggestions found") + @Tag(name = "ID lookup") + @GetMapping( + value = {"v2/id/{datasetId}/{identifier}"}, + produces = "application/json") + public Object matchV2( + @PathVariable(value = "datasetId", required = false) String datasetId, + @PathVariable(value = "identifier", required = false) String identifier) { + return matchingService.matchID(datasetId, identifier); + } + + @ApiResponse(responseCode = "200", description = "Name usage suggestions found") + @Tag(name = "ID lookup") + @GetMapping( + value = {"v2/id/{identifier}"}, + produces = "application/json") + public Object matchV2( + @PathVariable(value = "identifier", required = false) String identifier) { + return matchingService.matchID(identifier); + } +} diff --git a/matching-ws/src/main/java/life/catalogue/matching/MatchController.java b/matching-ws/src/main/java/life/catalogue/matching/MatchController.java index f35e2a467..cf397bcbc 100644 --- a/matching-ws/src/main/java/life/catalogue/matching/MatchController.java +++ b/matching-ws/src/main/java/life/catalogue/matching/MatchController.java @@ -30,6 +30,7 @@ import org.springframework.boot.web.servlet.error.ErrorController; import org.springframework.util.Assert; import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.context.request.WebRequest; diff --git a/matching-ws/src/main/java/life/catalogue/matching/MatchingService.java b/matching-ws/src/main/java/life/catalogue/matching/MatchingService.java index bfb15cfdb..1d5c87544 100644 --- a/matching-ws/src/main/java/life/catalogue/matching/MatchingService.java +++ b/matching-ws/src/main/java/life/catalogue/matching/MatchingService.java @@ -233,6 +233,14 @@ private static void setAlternatives(NameUsageMatch match, List a match.setAlternatives(alts); } + public List matchID(String identifier){ + return datasetIndex.lookupIdentifier(identifier); + } + + public List matchID(String datasetID, String identifier){ + return datasetIndex.lookupIdentifier(datasetID, identifier); + } + public NameUsageMatch match( @Nullable String scientificName, @Nullable LinneanClassification classification,