Skip to content

Commit

Permalink
performance improvement
Browse files Browse the repository at this point in the history
Step one: removed the tallest tent pole by fixing the ValidationTarget cache. Not everything was being cached. In fact, little to none of it was. Implementing this sped up the final step that counts on this by 10x. The 11000 strong bundle went from 73 seconds to 7.3 seconds. The overall time remains fixed at 1500 seconds. Not sure if the 73 seconds were simply redistributed or if that is just normal variation of an uncertain process (depends on network download times).
  • Loading branch information
Al Niessner authored and Al Niessner committed Aug 23, 2024
1 parent 0d6f7ce commit 49dd150
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 110 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ public static void additionalReferentialIntegrityChecks(URL crawlTarget, URL bun
*/
protected static void reportError(ProblemDefinition defn, URL targetUrl, int lineNumber,
int columnNumber) {
ValidationProblem problem = new ValidationProblem(defn, new ValidationTarget(targetUrl),
ValidationProblem problem = new ValidationProblem(defn, ValidationTarget.build(targetUrl),
lineNumber, columnNumber, defn.getMessage());
problemListener.addProblem(problem);
}
Expand All @@ -965,7 +965,7 @@ protected static void reportError(ProblemDefinition defn, URL targetUrl, int lin
*/
protected static void reportError(ProblemDefinition defn, URL target, int lineNumber,
int columnNumber, String message) {
ValidationProblem problem = new ValidationProblem(defn, new ValidationTarget(target),
ValidationProblem problem = new ValidationProblem(defn, ValidationTarget.build(target),
lineNumber, columnNumber, message);
problemListener.addProblem(problem);
}
Expand Down
50 changes: 2 additions & 48 deletions src/main/java/gov/nasa/pds/tools/util/Utility.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,16 @@
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocketFactory;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import gov.nasa.pds.tools.validate.TargetExaminer;
import gov.nasa.pds.tools.validate.TargetType;
import gov.nasa.pds.tools.validate.ValidationTarget;

/**
* Utility class.
Expand All @@ -46,15 +43,6 @@
public class Utility {
private static final Logger LOG = LoggerFactory.getLogger(Utility.class);

// A static cache of the ValidationTargets.
// There is no need to re-evaluate and/or create these
// as validation proceeds, as they are static things like
// a file or a URL.
public static HashMap<String, ValidationTarget> cachedTargets;
static {
cachedTargets = new HashMap<>();
}

// Implementation is needed since pds.nasa.gov currently uses SNI
// which is not supported in Java 6, but is supported in Java 7.
static {
Expand All @@ -70,40 +58,6 @@ public boolean verify(String hostname, javax.net.ssl.SSLSession sslSession) {
});
}

/**
* Returns a ValidationTarget for the specified target URL.
*
* If a cached target already exists in the cache, then that is returned, otherwise a new
* ValidationTarget is returned.
*
*/
public static ValidationTarget getValidationTarget(URL target) {
if (target == null) {
// for backwards-compatability with previous code, supporting the null case.
// This seems to be null in the additional context products case.
return new ValidationTarget(null);
}
ValidationTarget valTarget = cachedTargets.get(target.toString());
if (valTarget == null) {
valTarget = new ValidationTarget(target);
cachedTargets.put(target.toString(), valTarget);
}
return valTarget;
}
public static ValidationTarget getValidationTarget(URL source, URL label) {
if (source == null) {
// for backwards-compatability with previous code, supporting the null case.
// This seems to be null in the additional context products case.
return new ValidationTarget(null);
}
ValidationTarget valTarget = cachedTargets.get(source.toString());
if (valTarget == null) {
valTarget = new ValidationTarget(source, label);
cachedTargets.put(source.toString(), valTarget);
}
return valTarget;
}

/**
* Method that opens a connection. Supports redirects.
*
Expand All @@ -126,8 +80,8 @@ public static InputStream openConnection(URLConnection conn) throws IOException
SSLContext context = SSLContext.getInstance("TLSv1.2");
context.init(null, null, new java.security.SecureRandom());
HttpsURLConnection test = (HttpsURLConnection) conn;
SSLSocketFactory sf = test.getSSLSocketFactory();
SSLSocketFactory d = HttpsURLConnection.getDefaultSSLSocketFactory();
test.getSSLSocketFactory();
HttpsURLConnection.getDefaultSSLSocketFactory();
((HttpsURLConnection) conn).setSSLSocketFactory(context.getSocketFactory());
} catch (Exception e) {
throw new IOException(e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public ValidationTarget getRoot() {
public synchronized void addTarget(String parentLocation, TargetType type, String location) {
ValidationTarget target;
try {
target = new ValidationTarget(location, type);
target = ValidationTarget.build(location, type);
if (parentLocation == null) {
this.rootTarget = target;
}
Expand Down
11 changes: 5 additions & 6 deletions src/main/java/gov/nasa/pds/tools/validate/ValidationProblem.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import java.net.URL;
import java.util.Collection;
import gov.nasa.pds.tools.util.Utility;
import gov.nasa.pds.tools.validate.rule.GenericProblems;

public class ValidationProblem {
Expand All @@ -30,7 +29,7 @@ public class ValidationProblem {

public ValidationProblem(ProblemDefinition defn, URL target, int lineNumber, int columnNumber,
String message) {
this(defn, Utility.getValidationTarget(target), lineNumber, columnNumber, message);
this(defn, ValidationTarget.build(target), lineNumber, columnNumber, message);
}

public ValidationProblem(ProblemDefinition defn, ValidationTarget target, int lineNumber,
Expand All @@ -49,7 +48,7 @@ public ValidationProblem(ProblemDefinition defn, ValidationTarget target, int li
}

public ValidationProblem(ProblemDefinition defn, URL target, int lineNumber, int columnNumber) {
this(defn, Utility.getValidationTarget(target), lineNumber, columnNumber);
this(defn, ValidationTarget.build(target), lineNumber, columnNumber);
}

public ValidationProblem(ProblemDefinition defn, ValidationTarget target, int lineNumber,
Expand All @@ -58,18 +57,18 @@ public ValidationProblem(ProblemDefinition defn, ValidationTarget target, int li
}

public ValidationProblem(ProblemDefinition defn, URL target, String message) {
this(defn, Utility.getValidationTarget(target), message);
this(defn, ValidationTarget.build(target), message);
}

public ValidationProblem(ProblemDefinition defn, ValidationTarget target, String message) {
this(defn, target, -1, -1, message);
}

public ValidationProblem(ProblemDefinition defn, URL target) {
this(defn, Utility.getValidationTarget(target));
this(defn, ValidationTarget.build(target));
}
public ValidationProblem(ProblemDefinition defn, URL source, URL label) {
this(defn, Utility.getValidationTarget(source, label));
this(defn, ValidationTarget.build(source, label));
}

public ValidationProblem(ProblemDefinition defn, ValidationTarget target) {
Expand Down
77 changes: 36 additions & 41 deletions src/main/java/gov/nasa/pds/tools/validate/ValidationTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,19 @@

import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import org.apache.commons.io.FilenameUtils;
import gov.nasa.pds.tools.util.Utility;

/**
* Represents a location within a validation subtree that can have errors reported against it.
*/
public class ValidationTarget implements Comparable<ValidationTarget> {
// A static cache of the ValidationTargets.
// There is no need to re-evaluate and/or create these
// as validation proceeds, as they are static things like
// a file or a URL.
public static HashMap<String, ValidationTarget> cachedTargets = new HashMap<>();

private TargetType type;
private String name;
Expand All @@ -32,17 +38,36 @@ public class ValidationTarget implements Comparable<ValidationTarget> {

private int knownHashCode;

/**
* Creates a new instance.
*
* @param target the target file or directory
*/
public ValidationTarget(URL target) {
this(target, null);
private static ValidationTarget build (URL target, URL source, TargetType type) {
String key = target == null ? "" : target.toString();
if (!cachedTargets.containsKey(key)) {
cachedTargets.put (key,new ValidationTarget(target, source, type));
}
return cachedTargets.get(key);
}
public static ValidationTarget build (URL target) {return build (target, null, null);}
public static ValidationTarget build (URL target, URL label) {return build (target, label, null);}
public static ValidationTarget build (String targetLocation, TargetType type) throws MalformedURLException
{
int slashPos = targetLocation.lastIndexOf('/');
ValidationTarget result = build(new URL(targetLocation), null, type);
result.setLocation(targetLocation);
if (slashPos < 0) {
slashPos = targetLocation.lastIndexOf('\\');
}

if (slashPos < 0) {
result.setName(targetLocation);
} else {
result.setName(targetLocation.substring(slashPos + 1));
}
return result;
}
public ValidationTarget(URL target, URL label) {

private ValidationTarget(URL target, URL label, TargetType type) {
this.url = target;
if (target != null) {
this.type = type;
if (target != null && type == null) {
if (label == null) {
type = Utility.getTargetType(target);
} else {
Expand All @@ -57,35 +82,11 @@ public ValidationTarget(URL target, URL label) {
name = FilenameUtils.getName(target.toString());
}
} else {
type = TargetType.FILE;
location = null;
name = null;
}
}

/**
* Creates an instance with a given location.
*
* @param location the location
* @throws MalformedURLException
*/
public ValidationTarget(String location, TargetType type) throws MalformedURLException {
this.location = location;
this.type = type;
this.url = new URL(location);

int slashPos = location.lastIndexOf('/');
if (slashPos < 0) {
slashPos = location.lastIndexOf('\\');
}

if (slashPos < 0) {
name = location;
} else {
name = location.substring(slashPos + 1);
}
}

/**
* Gets the target type.
*
Expand Down Expand Up @@ -118,7 +119,7 @@ public String getLocation() {
*
* @param location the new location
*/
public void setLocation(String location) {
protected void setLocation(String location) {
this.location = location;
}

Expand Down Expand Up @@ -189,8 +190,7 @@ public String toString() {
@Override
public int hashCode() {
if (knownHashCode == 0) {
String combined = location + ":" + (type == null ? "" : type.toString());
knownHashCode = combined.hashCode();
knownHashCode = location.hashCode();
}

return knownHashCode;
Expand All @@ -209,9 +209,4 @@ public boolean equals(Object obj) {
public URL getUrl() {
return url;
}

public void setUrl(URL url) {
this.url = url;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public static void uniqueBundleRefs (ProblemListener listener, URL aggregate) {
InventoryTableValidator.checkAllUnique(ids, listener, aggregate);
} catch (XPathException | XPathExpressionException e) {
listener.addProblem(new ValidationProblem(GenericProblems.UNCAUGHT_EXCEPTION,
new ValidationTarget(aggregate),-1, -1, e.getMessage()));
ValidationTarget.build(aggregate),-1, -1, e.getMessage()));
}
}
public static void uniqueCollectionRefs (ProblemListener listener, URL aggregate) {
Expand All @@ -46,7 +46,7 @@ public static void uniqueCollectionRefs (ProblemListener listener, URL aggregate
InventoryTableValidator.checkAllUnique(ids, listener, aggregate);
} catch (InventoryReaderException e) {
listener.addProblem(new ValidationProblem(GenericProblems.UNCAUGHT_EXCEPTION,
new ValidationTarget(aggregate),-1, -1, e.getMessage()));
ValidationTarget.build(aggregate),-1, -1, e.getMessage()));
}
}
private static void checkAllUnique(List<String> ids, ProblemListener listener, URL aggregate) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ protected TargetRegistrar getRegistrar() {
*/
protected void reportError(ProblemDefinition defn, URL targetUrl, int lineNumber,
int columnNumber) {
ValidationProblem problem = new ValidationProblem(defn, new ValidationTarget(targetUrl),
ValidationProblem problem = new ValidationProblem(defn, ValidationTarget.build(targetUrl),
lineNumber, columnNumber, defn.getMessage());
listener.addProblem(problem);
}
Expand All @@ -184,7 +184,7 @@ protected void reportError(ProblemDefinition defn, URL targetUrl, int lineNumber
*/
protected void reportError(ProblemDefinition defn, URL target, int lineNumber, int columnNumber,
String message) {
ValidationProblem problem = new ValidationProblem(defn, new ValidationTarget(target),
ValidationProblem problem = new ValidationProblem(defn, ValidationTarget.build(target),
lineNumber, columnNumber, message);
listener.addProblem(problem);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ public void checkContextReferences() throws XPathExpressionException {
} catch (URISyntaxException e) {
// Should never happen
}
ValidationTarget target = new ValidationTarget(getTarget());
ValidationTarget target = ValidationTarget.build(getTarget());
Document label = getContext().getContextValue(PDS4Context.LABEL_DOCUMENT, Document.class);
DOMSource source = new DOMSource(label);
source.setSystemId(uri.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public FileAndDirectoryNamingChecker() {}

private ValidationProblem constructError(ProblemDefinition defn, URL targetUrl, int lineNumber,
int columnNumber) {
ValidationProblem problem = new ValidationProblem(defn, new ValidationTarget(targetUrl),
ValidationProblem problem = new ValidationProblem(defn, ValidationTarget.build(targetUrl),
lineNumber, columnNumber, defn.getMessage());
return (problem);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ public void validateFileReferences() {
ProblemDefinition pd =
new ProblemDefinition(ExceptionType.ERROR, ProblemType.INTERNAL_ERROR, te.getMessage());
if (te.getLocator() != null) {
getListener().addProblem(new ValidationProblem(pd, new ValidationTarget(getTarget()),
getListener().addProblem(new ValidationProblem(pd, ValidationTarget.build(getTarget()),
te.getLocator().getLineNumber(), te.getLocator().getColumnNumber(), te.getMessage()));
} else {
getListener().addProblem(new ValidationProblem(pd, new ValidationTarget(getTarget())));
getListener().addProblem(new ValidationProblem(pd, ValidationTarget.build(getTarget())));
}
}
LOG.debug("validateFileReferences:leaving:uri {}", uri);
Expand Down Expand Up @@ -160,7 +160,7 @@ private void checkExtension(String filename, String encoding) {
}

private boolean validate(NodeInfo xml) {
this.target = new ValidationTarget(getTarget());
this.target = ValidationTarget.build(getTarget());

try {
// Perform checksum validation on the label itself
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public boolean isApplicable(String location) {
@ValidationTest
public void validateLocalIdentifiers() {
List<ValidationProblem> problems = new ArrayList<>();
ValidationTarget target = new ValidationTarget(getTarget());
ValidationTarget target = ValidationTarget.build(getTarget());
Document label = getContext().getContextValue(PDS4Context.LABEL_DOCUMENT, Document.class);
DOMSource source = new DOMSource(label);

Expand All @@ -80,7 +80,7 @@ public void validateLocalIdentifiers() {
ProblemDefinition pd = new ProblemDefinition(ExceptionType.ERROR, ProblemType.INTERNAL_ERROR,
"Error while finding local_identifier_reference attributes using XPath '"
+ LOCAL_IDENTIFIER_REF_PATH + "': " + xe.getMessage());
getListener().addProblem(new ValidationProblem(pd, new ValidationTarget(getTarget())));
getListener().addProblem(new ValidationProblem(pd, ValidationTarget.build(getTarget())));
return;
}
NodeList localIds = null;
Expand All @@ -91,7 +91,7 @@ public void validateLocalIdentifiers() {
ProblemDefinition pd = new ProblemDefinition(ExceptionType.ERROR, ProblemType.INTERNAL_ERROR,
"Error while finding local_identifier attributes using XPath '" + LOCAL_IDENTIFIER_PATH
+ "': " + xe.getMessage());
getListener().addProblem(new ValidationProblem(pd, new ValidationTarget(getTarget())));
getListener().addProblem(new ValidationProblem(pd, ValidationTarget.build(getTarget())));
return;
}

Expand Down

0 comments on commit 49dd150

Please sign in to comment.