Skip to content

Commit

Permalink
Merge pull request #751 from NASA-PDS/issue_748
Browse files Browse the repository at this point in the history
Improvements for reference integrity checking for large collection inventories (~>100MB)
  • Loading branch information
jordanpadams authored Nov 15, 2023
2 parents 0e96ffe + 7691765 commit f2583d8
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.logging.log4j.Logger;
import org.opensearch.action.search.SearchRequest;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.client.HttpAsyncResponseConsumerFactory;
import org.opensearch.client.RequestOptions;
import org.opensearch.client.RestClient;
import org.opensearch.client.RestClientBuilder;
Expand All @@ -43,6 +44,7 @@ public class OpensearchDocument implements DocumentInfo, RestClientBuilder.HttpC
final private HashMap<String, List<String>> references = new HashMap<String, List<String>>();
final private LidvidComparator lidvid_compare = new LidvidComparator();
final private Logger log = LogManager.getLogger(OpensearchDocument.class);
final private RequestOptions.Builder hugeMemory = RequestOptions.DEFAULT.toBuilder();

private void load(String lidvid) {
if (!this.documents.containsKey(lidvid)) {
Expand Down Expand Up @@ -149,7 +151,7 @@ protected SearchResponse search(RestHighLevelClient client, SearchRequest reques
try {
if (OpensearchDocument.sourceOverride != null)
return OpensearchDocument.sourceOverride.search(client, request);
return client.search(request, RequestOptions.DEFAULT);
return client.search(request, this.hugeMemory.build());
} catch (ConnectException ce) {
iteration++;
if (iteration < 5)
Expand All @@ -162,6 +164,8 @@ protected SearchResponse search(RestHighLevelClient client, SearchRequest reques

public OpensearchDocument(AuthInformation context) {
this.context = context;
this.hugeMemory.setHttpAsyncResponseConsumerFactory(
new HttpAsyncResponseConsumerFactory.HeapBufferedResponseConsumerFactory(Integer.MAX_VALUE)); // almost 2 GB
}

@Override
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/gov/nasa/pds/validate/ri/UserInput.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.transform.sax.SAXSource;
import org.apache.commons.lang.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.xml.sax.InputSource;
Expand Down Expand Up @@ -44,10 +45,11 @@ private boolean isLabel (String cliArg) {
TreeInfo docInfo = LabelParser.parse(saxSource); // Parses a label.
List<TinyNodeImpl> xmlModels = new ArrayList<>();
XMLExtractor extractor = new XMLExtractor(docInfo.getRootNode());
xmlModels = extractor.getNodesFromDoc("logical_identifier");
xmlModels = extractor.getNodesFromDoc("//logical_identifier");
this.labels_lidvid = xmlModels.get(0).getStringValue();
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
Expand All @@ -57,7 +59,10 @@ private boolean isLabel (String cliArg) {
private List<String> process (List<String> cliList){
List<String> lidvids = new ArrayList<String>();
for (String cliArg : cliList) {
if (cliArg.startsWith ("urn:")) {
cliArg = StringUtils.substringBefore(cliArg, "#");
if (cliArg.length() == 0) {
// ignore empty lines
} else if (cliArg.startsWith ("urn:")) {
lidvids.add (cliArg);
} else if (this.isLabel (cliArg)) {
lidvids.add (this.labels_lidvid);
Expand Down

0 comments on commit f2583d8

Please sign in to comment.