Skip to content

Commit

Permalink
Add PDS label file paths and manifest files as additional possible in…
Browse files Browse the repository at this point in the history
…puts (#745)

* make it so the arguemnt can be a lidvid, label, or manifest

A lidvid must start with urn:nasa:pds: to be detected as such.

A label must be a well formed PDS4 XML file.

A manifest file is a one item per line where an item may be either a lidvid or a label.

* update help

* Improved help information

Expand help information to be more descriptive for users.

* Fix typo

* Cleanup help display

* Fix test failure

---------

Co-authored-by: Al Niessner <[email protected]>
Co-authored-by: Jordan Padams <[email protected]>
Co-authored-by: Jordan Padams <[email protected]>
  • Loading branch information
4 people authored Nov 2, 2023
1 parent 8fef282 commit f49894e
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 10 deletions.
45 changes: 35 additions & 10 deletions src/main/java/gov/nasa/pds/validate/ri/CommandLineInterface.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ public class CommandLineInterface {
public CommandLineInterface() {
super();
this.opts = new Options();

// Disabling this argument for the time being since the Search API does not yet support authorized access
this.opts.addOption(Option.builder("A").argName("auth-file").desc(
"file with the URL and credential content to have full (all product states) read-only access to the registry API")
"file with the URL and credential content to have full (all product states) read-only access to the Registry Search API")
.hasArg(true).longOpt("auth-api").numberOfArgs(1).optionalArg(true).build());

this.opts.addOption(Option.builder("a").argName("auth-file").desc(
"file with the URL and credential content to have full, direct read-only access to the search DB")
"file with the URL and credential content to have full, direct read-only access to the Registry OpenSearch DB")
.hasArg(true).longOpt("auth-opensearch").numberOfArgs(1).optionalArg(true).build());
this.opts.addOption(Option.builder("h").desc("show this text and exit").hasArg(false)
.longOpt("help").optionalArg(true).build());
Expand All @@ -40,10 +43,27 @@ public CommandLineInterface() {
}

public void help() {
new HelpFormatter().printHelp("ValidateReferenceIntegrity",
"\nChecks the search DB that all references exist. If the api-auth is provided, then it will also check that the registry API also finds all the references. For lidvid, multiple values can be given using a comma like 'urn:foo::1.0,urn:bar::2.0'.\n\n",
new HelpFormatter().printHelp("validate-refs LIDVID LABEL-FILEPATH MANIFEST-FILEPATH",
"\nChecks that (1) all product references within a given product and " +
"(2) any aggregrate product references (bundles -> collections -> products) " +
"exist in the Registry OpenSearch DB or Search API. \n\n" +
"Expected positional arguments are either a LIDVID, LABEL-FILEPATH, or MANIFEST-FILEPATH.\n" +
" - A LIDVID must start with urn:.\n" +
" - A LABEL-FILEPATH must be a well formed PDS XML file.\n" +
" - A MANIFEST-FILEPATH is one item per line with an item being a lidvid or label. Each line must be terminated by a LF.\n\n" +
"Multiple arguments may be given in any order, for example:\n" +
" > validate-refs urn:nasa:pds:foo::1.0 label.xml urn:nasa:pds:bar::2.0 manifest.txt\n\n",
opts,
"\nAn auth-file is either a text file of the Java property format with two variables: 'url' and 'credentials'. The 'url' property should be the complete base URL to the Registry Search endpoint or Search API, e.g. 'https://localhost:9876/base', and 'credentials' a path to a java property file with the user name, password, and other credential information as that used by harvest. Or it is an XML text file used by harvest with <registry> containing the 'auth' attribute.\n\n",
"\nAn auth-file is either a text file of the Java property format " +
"with two variables, 'url' and 'credentials': \n\n" +
" - The 'url' property is the complete base URL to the Registry OpenSearch endpoint or Search API\n" +
" * 'https://my-registry.es.amazonaws.com/_search'\n\n" +
" - The 'credentials' is the path to:\n" +
" * Harvest config file containing the necessary Registry OpenSearch authorization\n" +
" <registry url=\"http://localhost:9200\" index=\"registry\" auth=\"/path/to/auth.cfg\" />\n" +
" * Java Properties file with a 'user' and 'password' specified, for example: \n" +
" user=janedoe\n" +
" password=mypassword\n\n",
true);
}

Expand All @@ -68,13 +88,18 @@ public int process(String[] args)
if (cl.hasOption("verbose"))
loggerConfig.setLevel(Level.INFO);
ctx.updateLoggers();

// Disabling this argument for the time being since the Search API does not yet support authorized access
this.opts.addOption(Option.builder("A").argName("auth-file").desc(
"file with the URL and credential content to have full (all product states) read-only access to the Registry Search API")
.hasArg(true).longOpt("auth-api").numberOfArgs(1).optionalArg(true).build());
if (!cl.hasOption("a"))
throw new ParseException(
"Not yet implemented. Must provide OpenSearch Registry authorization information.");
throw new ParseException("Not yet implemented. Must provide OpenSearch Registry authorization information.");

if (cl.getArgList().size() < 1)
throw new ParseException("Must provide at least one LIDVID as a starting point.");
throw new ParseException("Must provide at least one LIDVID, Label file path, or manifest file path as a starting point.");
if (!cl.hasOption("A"))
log.warn("Using OpenSearch Registry to check references.");
log.warn("Using Registry OpenSearch Database to check references.");

if (cl.hasOption("t")) {
try {
Expand All @@ -90,7 +115,7 @@ public int process(String[] args)

this.log.info("Starting the reference integrity checks.");
try {
Engine engine = new Engine(cylinders, cl.getArgList(),
Engine engine = new Engine(cylinders, UserInput.toLidvids (cl.getArgList()),
AuthInformation.buildFrom(cl.getOptionValue("auth-api", "")),
AuthInformation.buildFrom(cl.getOptionValue("auth-opensearch")));
engine.processQueueUntilEmpty();
Expand Down
70 changes: 70 additions & 0 deletions src/main/java/gov/nasa/pds/validate/ri/UserInput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package gov.nasa.pds.validate.ri;

import java.io.File;
import java.io.FileReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.transform.sax.SAXSource;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.xml.sax.InputSource;
import gov.nasa.pds.tools.util.LabelParser;
import gov.nasa.pds.tools.util.XMLExtractor;
import net.sf.saxon.om.TreeInfo;
import net.sf.saxon.tree.tiny.TinyNodeImpl;

class UserInput {
final private Logger log = LogManager.getLogger(UserInput.class);
public String labels_lidvid = "";
public static List<String> toLidvids (List<String> cliList){
return new UserInput().process (cliList);
}
private List<String> expandManifest (String cliArg) {
File file = new File(cliArg);
List<String> lidvids = new ArrayList<String>();
if (file.exists()) {
try (Stream<String> lines = Files.lines(Paths.get(cliArg))) {
lidvids.addAll (this.process(lines.collect(Collectors.toList())));
} catch (Exception e) {
log.warn ("The argument '" + cliArg + "' does not look like a LIDVID, Label, or manifest file. Ignoring it.");
}
}
return lidvids;
}
private boolean isLabel (String cliArg) {
if (cliArg.endsWith(".xml") || cliArg.endsWith (".lblx")) {
File file = new File(cliArg);
if (file.exists()) {
try {
SAXSource saxSource = new SAXSource(new InputSource(new FileReader(file)));
TreeInfo docInfo = LabelParser.parse(saxSource); // Parses a label.
List<TinyNodeImpl> xmlModels = new ArrayList<>();
XMLExtractor extractor = new XMLExtractor(docInfo.getRootNode());
xmlModels = extractor.getNodesFromDoc("logical_identifier");
this.labels_lidvid = xmlModels.get(0).getStringValue();
return true;
} catch (Exception e) {
return false;
}
}
}
return false;
}
private List<String> process (List<String> cliList){
List<String> lidvids = new ArrayList<String>();
for (String cliArg : cliList) {
if (cliArg.startsWith ("urn:")) {
lidvids.add (cliArg);
} else if (this.isLabel (cliArg)) {
lidvids.add (this.labels_lidvid);
} else {
lidvids.addAll (this.expandManifest (cliArg));
}
}
return lidvids;
}
}
2 changes: 2 additions & 0 deletions src/test/resources/riut/manifest.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
urn:nasa:pds:insight_rad::2.1
urn:nasa:pds:insight_rad::2.0

0 comments on commit f49894e

Please sign in to comment.