Skip to content

Commit

Permalink
simple services for ID lookup #1318
Browse files Browse the repository at this point in the history
  • Loading branch information
djtfmartin committed Jun 27, 2024
1 parent 0823628 commit 74f6d61
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,90 @@ public static class IDMatchResult {
boolean matchOnlyInJoinIndex;
}

/**
* Matches an external ID. Intended for debug purposes only, to quickly
* check if ids are present and joined to main index or not.
*
* @param identifier the identifier to match
* @return IDMatchResult with the document, datasetKey, and flags
*/
public List<ExternalID> lookupIdentifier(@NotNull String identifier) {
List<ExternalID> results = new ArrayList<>();

try {
// if join indexes are present, add them to the match
if (identifierSearchers != null && !identifierSearchers.isEmpty()) {
for (Dataset dataset : identifierSearchers.keySet()) {
// find the index and search it
IndexSearcher identifierSearcher = identifierSearchers.get(dataset);
Query identifierQuery = new TermQuery(new Term(FIELD_ID, identifier));
TopDocs identifierDocs = identifierSearcher.search(identifierQuery, 3);

if (identifierDocs.totalHits.value > 0) {
Document identifierDoc = identifierSearcher.storedFields().document(identifierDocs.scoreDocs[0].doc);
results.add(toExternalID(identifierDoc, dataset.getKey().toString()));
}
}
}
} catch (IOException e) {
log.error("Problem querying external ID indexes with {}", identifier, e);
}
// no indexes available
return results;
}

/**
* Matches an external ID. Intended for debug purposes only, to quickly
* check if ids are present and joined to main index or not.
*
* @param datasetID the datasetKey to match
* @param identifier the identifier to match
* @return IDMatchResult with the document, datasetKey, and flags
*/
public List<ExternalID> lookupIdentifier(@NotNull String datasetID, @NotNull String identifier) {
List<ExternalID> results = new ArrayList<>();

try {
// if join indexes are present, add them to the match
if (identifierSearchers != null && !identifierSearchers.isEmpty()) {
for (Dataset dataset : identifierSearchers.keySet()) {

// use the prefix mapping
if (dataset.getKey().toString().equals(datasetID) || (dataset.getGbifKey() != null && dataset.getGbifKey().equals(datasetID))) {

// find the index and search it
IndexSearcher identifierSearcher = identifierSearchers.get(dataset);
Query identifierQuery = new TermQuery(new Term(FIELD_ID, identifier));
TopDocs identifierDocs = identifierSearcher.search(identifierQuery, 3);

if (identifierDocs.totalHits.value > 0) {
Document identifierDoc = identifierSearcher.storedFields().
document(identifierDocs.scoreDocs[0].doc);

results.add(toExternalID(identifierDoc, dataset.getKey().toString()));
}
}
}
}
} catch (IOException e) {
log.error("Problem querying external ID indexes with {}", identifier, e);
}
// no indexes available
return results;
}

private static ExternalID toExternalID(Document doc, String datasetKey) {
return ExternalID.builder()
.id(doc.get(FIELD_ID))
.datasetKey(datasetKey)
.scientificName(doc.get(FIELD_SCIENTIFIC_NAME))
.rank(doc.get(FIELD_RANK))
.parentID(doc.get(FIELD_PARENT_ID))
.status(doc.get(FIELD_STATUS))
.mainIndexID(doc.get(FIELD_JOIN_ID))
.build();
}

/**
* Matches an external ID
* @param key the external ID to match
Expand Down
31 changes: 31 additions & 0 deletions matching-ws/src/main/java/life/catalogue/matching/ExternalID.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package life.catalogue.matching;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonInclude;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Builder;
import lombok.Data;

@Schema(description = "An identifier from another checklist that may be associated with a name usage in the main index", title = "ExternalID", type = "object")
@Data
@JsonIgnoreProperties(ignoreUnknown = true)
@JsonInclude(JsonInclude.Include.NON_NULL)
@Builder
public class ExternalID {
@Schema(description = "The external identifier that may or may not have been associated with the main index")
private String id;
@Schema(description = "The main index identifier that the external identifier was matched to")
private String mainIndexID;
@Schema(description = "The dataset key of the main index identifier")
private String datasetKey;
@Schema(description = "The parent ID of the external identifier")
private String parentID;
@Schema(description = "The scientific name associated with the external identifier")
private String scientificName;
@Schema(description = "The accepted taxon ID of the external identifier")
private String acceptedTaxonID;
@Schema(description = "The rank of the external identifier")
private String rank;
@Schema(description = "The status of the external identifier")
private String status;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package life.catalogue.matching;

import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;

@RestController
public class IDController {

@Autowired
MatchingService matchingService;

@ApiResponse(responseCode = "200", description = "Name usage suggestions found")
@Tag(name = "ID lookup")
@GetMapping(
value = {"v2/id/{datasetId}/{identifier}"},
produces = "application/json")
public Object matchV2(
@PathVariable(value = "datasetId", required = false) String datasetId,
@PathVariable(value = "identifier", required = false) String identifier) {
return matchingService.matchID(datasetId, identifier);
}

@ApiResponse(responseCode = "200", description = "Name usage suggestions found")
@Tag(name = "ID lookup")
@GetMapping(
value = {"v2/id/{identifier}"},
produces = "application/json")
public Object matchV2(
@PathVariable(value = "identifier", required = false) String identifier) {
return matchingService.matchID(identifier);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.springframework.boot.web.servlet.error.ErrorController;
import org.springframework.util.Assert;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.context.request.WebRequest;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ private static void setAlternatives(NameUsageMatch match, List<NameUsageMatch> a
match.setAlternatives(alts);
}

public List<ExternalID> matchID(String identifier){
return datasetIndex.lookupIdentifier(identifier);
}

public List<ExternalID> matchID(String datasetID, String identifier){
return datasetIndex.lookupIdentifier(datasetID, identifier);
}

public NameUsageMatch match(
@Nullable String scientificName,
@Nullable LinneanClassification classification,
Expand Down

0 comments on commit 74f6d61

Please sign in to comment.