Skip to content

Commit

Permalink
Merge integration into main for release
Browse files Browse the repository at this point in the history
  • Loading branch information
RayPlante committed Sep 11, 2024
2 parents 162427a + 1e45804 commit 52d492d
Show file tree
Hide file tree
Showing 14 changed files with 313 additions and 54 deletions.
8 changes: 1 addition & 7 deletions docker/build-test/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,6 @@ RUN java_certs=$JAVA_HOME/jre/lib/security/cacerts; \
keytool -import -keystore $java_certs -trustcacerts -file $crt \
-storepass changeit -alias $name -noprompt; \
done;
FROM eclipse-temurin:8

RUN mkdir -p /usr/share/man/man1
RUN apt-get update && apt-get install -y netcat-openbsd zip git less \
python2 curl maven
RUN cd /usr/bin && ln -s python2 python

# Create the user that build/test operations should run as. Normally,
# this is set to match identity information of the host user that is
Expand All @@ -45,7 +39,7 @@ RUN chown $devuser:$devuser /home/$devuser/.m2/settings.xml && \
COPY entrypoint.sh /app/entrypoint.sh
RUN chmod a+rx /app/entrypoint.sh

ENV S3MOCK_JAVA_OPTS -XX:+UseContainerSupport
ENV S3MOCK_JAVA_OPTS=-XX:+UseContainerSupport
WORKDIR /app/dev
USER $devuser

Expand Down
2 changes: 1 addition & 1 deletion docker/dockbuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ set -e

## These are set by default via _run.sh; if necessary, uncomment and customize
#
# PACKAGE_NAME=oar-build
PACKAGE_NAME=oar-dist-service
#
## list the names of the image directories (each containing a Dockerfile) for
## containers to be built. List them in dependency order (where a latter one
Expand Down
73 changes: 73 additions & 0 deletions src/main/java/gov/nist/oar/distrib/cachemgr/pdr/CacheOpts.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/**
* This software was developed at the National Institute of Standards and Technology by employees of
* the Federal Government in the course of their official duties. Pursuant to title 17 Section 105
* of the United States Code this software is not subject to copyright protection and is in the
* public domain. This is an experimental system. NIST assumes no responsibility whatsoever for its
* use by other parties, and makes no guarantees, expressed or implied, about its quality,
* reliability, or any other characteristic. We would appreciate acknowledgement if the software is
* used. This software can be redistributed and/or modified freely provided that any derivative
* works bear some notice that they are derived from it, and any modified versions bear some notice
* that they have been modified.
*
* @author: Raymond Plante
*/
package gov.nist.oar.distrib.cachemgr.pdr;

/**
* an object for collecting options used for caching data.
*/
public class CacheOpts {
public int prefs = 0;
public String seq = null;
public boolean recache = false;

public CacheOpts() { }

public CacheOpts(boolean recache, int prefs, String seq) {
this.prefs = prefs;
this.recache = recache;
this.seq = seq;
}

public String serialize() {
StringBuilder sb = new StringBuilder("re=");
sb.append((recache) ? "1" : "0");
if (seq != null) {
if (sb.length() > 0) sb.append(",");
sb.append("seq=").append(seq);
}
if (prefs != 0) {
if (sb.length() > 0) sb.append(",");
sb.append("pr=").append(Integer.toString(prefs));
}
return sb.toString();
}

public static CacheOpts parse(String optstr) {
CacheOpts out = new CacheOpts();
if (optstr == null || optstr.equals("0")) // legacy syntax
return out;
if (optstr.equals("1")) { // legacy syntax
out.recache = true;
return out;
}

String[] opts = optstr.split(",\\s*");
for (String opt : opts) {
String[] kv = opt.split("=", 2);
if (kv.length > 1) {
kv[0] = kv[0].toLowerCase();
if ("recache".startsWith(kv[0]))
out.recache = kv[1].equals("1");
else if ("prefs".startsWith(kv[0])) {
try { out.prefs = Integer.parseInt(kv[1]); }
catch (NumberFormatException ex) { }
}
else if ("sequence".startsWith(kv[0]))
out.seq = kv[1];
}
}

return out;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -215,13 +215,30 @@ public void optimallyCache(String id, int prefs) throws CacheManagementException
/**
* queue up a dataset or file object to be cached asynchronously via a separate thread
* @param id the full aipid for the dataset or object (of the form DSID[/FILEPATH][#VERSION])
* @param recache if true, request that files already in the cache be recached from scratch;
* if false, such files will not be updated.
*/
public void queueCache(String id, boolean recache)
throws ResourceNotFoundException, StorageVolumeException, CacheManagementException
{
queueCache(id, recache, null);
}

/**
* queue up a dataset or file object to be cached asynchronously via a separate thread
* @param id the full aipid for the dataset or object (of the form DSID[/FILEPATH][#VERSION])
* @param recache if true, request that files already in the cache be recached from scratch;
* if false, such files will not be updated.
* @param sequence a string for restricting the files from the dataset that get restored. The
* value is matched against the bagfile sequence number; only those files contained
* in that bag will be restored.
*/
public void queueCache(String id, boolean recache, String sequence)
throws ResourceNotFoundException, StorageVolumeException, CacheManagementException
{
if (restorer.doesNotExist(id))
throw new ResourceNotFoundException(id);
cath.queue(id, recache);
cath.queue(id, recache, sequence);
if (! cath.isAlive())
cath.start();
}
Expand Down Expand Up @@ -628,6 +645,23 @@ public List<CacheObject> check(String aipid, boolean recache)
return deleted;
}

/**
* cache all of the files from the given dataset
* @param dsid the AIP identifier for the dataset; this is either the old-style EDI-ID or
* local portion of the PDR ARK identifier (e.g., <code>"mds2-2119"</code>).
* @param version the desired version of the dataset or null for the latest version
* @param recache if false and a file is already in the cache, the file will not be rewritten;
* otherwise, all current cached files from the dataset will be replaced with a
* fresh copy.
* @param prefs any ANDed preferences for how to cache the data (particularly, where).
* @param target a prefix collection name to insert the data files into within the cache
*/
public Set<String> cacheDataset(String dsid, String version, CacheOpts opts, String target)
throws StorageVolumeException, ResourceNotFoundException, CacheManagementException
{
return ((PDRDatasetRestorer) restorer).cacheDataset(dsid, version, theCache, opts, target);
}

/**
* a thread that will cache data in order of their IDs in an internal queue. Objects can be added
* to the queue via {@link #queue(String)}.
Expand Down Expand Up @@ -685,8 +719,13 @@ synchronized Queue<String> loadQueue() throws IOException {
return _queue;
}

public synchronized void queue(String aipid, boolean recache) throws CacheManagementException {
aipid += "\t"+((recache) ? "1" : "0");
public void queue(String aipid, boolean recache) throws CacheManagementException {
queue(aipid, recache, null);
}

public synchronized void queue(String aipid, boolean recache, String seq) throws CacheManagementException {
CacheOpts opts = new CacheOpts(recache, 0, seq);
aipid += "\t"+opts.serialize();
try {
BufferedWriter out = new BufferedWriter(new FileWriter(_queuef, true));
try {
Expand All @@ -712,7 +751,7 @@ public boolean hasPending() {
public boolean isQueued(String aipid) {
try {
Queue<String> _queue = loadQueue();
return _queue.contains(aipid+"\t0") || _queue.contains(aipid+"\t1");
return ! _queue.stream().noneMatch(e -> e.startsWith(aipid+"\t"));
} catch (IOException ex) {
log.error("isQueued: status of "+aipid+" unknown; "+
"Can't access queue's persistent cache: "+ ex.getMessage());
Expand Down Expand Up @@ -740,21 +779,23 @@ public String cacheNext() throws CacheManagementException {
}

protected String cacheQueueItem(String qitem) throws CacheManagementException {
boolean recache = true;
String[] parts = qitem.split("\\s*\\t\\s*");
String nextid = parts[0];
inprocess = nextid;
if (parts.length > 1 && "0".equals(parts[1]))
recache = false;

CacheOpts opts = new CacheOpts();
if (parts.length > 1)
opts = CacheOpts.parse(parts[1]);

String version = null;
if (parts.length > 2) version = parts[2];

parts = ((PDRDatasetRestorer) restorer).parseId(nextid);
try {
if (parts[1].length() == 0)
// dataset identifier
cacheDataset(parts[0], version, recache, 0, null);
else if (recache || ! isCached(nextid))
cacheDataset(parts[0], version, opts, null);
else if (opts.recache || ! isCached(nextid))
// data file identifier
cache(nextid, true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.util.Map;
import java.util.HashSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipEntry;
import java.nio.file.Path;
Expand Down Expand Up @@ -384,16 +385,31 @@ public Set<String> cacheDataset(String aipid, String version, Cache into, boolea
int prefs, String target)
throws StorageVolumeException, ResourceNotFoundException, CacheManagementException
{
return cacheDatasetFromStore(aipid, version, into, recache, prefs, target, ltstore);
CacheOpts opts = new CacheOpts(recache, prefs, null);
return cacheDatasetFromStore(aipid, version, into, opts, target, ltstore);
}

/**
* cache all data that is part of a the latest version of the archive information package (AIP).
* @param aipid the identifier for the AIP.
* @param version the version of the AIP to cache. If null, the latest is cached.
* @param into the Cache to save the files to
* @param opts options for caching the data (including "recache", whether to force a recache)
* @return Set<String> -- a list of the filepaths for files that were cached
*/
public Set<String> cacheDataset(String aipid, String version, Cache into, CacheOpts opts, String target)
throws StorageVolumeException, ResourceNotFoundException, CacheManagementException
{
return cacheDatasetFromStore(aipid, version, into, opts, target, ltstore);
}

/**
* cache all data that is part of a the latest version of the archive information package (AIP)
* from a particular store. Called by {@link cacheDataset}, this is provided to allow alternate
* implementations by subclasses.
*/
protected Set<String> cacheDatasetFromStore(String aipid, String version, Cache into, boolean recache,
int prefs, String target, BagStorage store)
protected Set<String> cacheDatasetFromStore(String aipid, String version, Cache into, CacheOpts opts,
String target, BagStorage store)
throws StorageVolumeException, ResourceNotFoundException, CacheManagementException
{
// find the head bag in the bag store
Expand All @@ -402,8 +418,8 @@ protected Set<String> cacheDatasetFromStore(String aipid, String version, Cache
throw new CacheManagementException("Unsupported serialization type on bag: " + headbag);
String bagname = headbag.substring(0, headbag.length()-4);
String mbagver = BagUtils.multibagVersionOf(bagname);
if (prefs == 0) {
prefs = getDefaultPrefs(version != null);
if (opts.prefs == 0) {
opts.prefs = getDefaultPrefs(version != null);
}
// pull out the NERDm resource metadata record
JSONObject resmd = null;
Expand Down Expand Up @@ -442,16 +458,43 @@ protected Set<String> cacheDatasetFromStore(String aipid, String version, Cache
revlu.get(pair.getValue()).add(pair.getKey().replaceFirst("^data/", ""));
}

HashMap<String, Set<String>> uselu = revlu;
if (opts.seq != null) {
uselu = new HashMap<String, Set<String>>();
for (String bagf : revlu.keySet()) {
if (bagf.endsWith("-"+opts.seq))
uselu.put(bagf, revlu.get(bagf));
}
if (uselu.size() == 0)
log.warn("Sequence tag does not match any bag files for requested version");
}

// loop through the member bags and extract the data files
Set<String> cached = new HashSet<String>(lu.size());
Set<String> missing = new HashSet<String>();
for (String bagfile : revlu.keySet()) {
Set<String> need = new HashSet<String>(revlu.get(bagfile));
for (String bagfile : uselu.keySet()) {
Set<String> need = new HashSet<String>(uselu.get(bagfile));
if (! opts.recache) {
// check to see if we really need to open up this bag
Iterator<String> it = need.iterator();
while (it.hasNext()) {
String fp = it.next();
if (into.isCached(idForObject(aipid, fp, version, target))) {
it.remove();
cached.add(fp);
}
}
if (need.size() == 0) {
log.info("Nothing needed from bag, {} (skipping)", bagfile);
continue;
}
}

if (! bagfile.endsWith(".zip"))
bagfile += ".zip";
log.info("Caching files from bag, "+bagfile);
try {
cacheFromBag(bagfile, need, cached, resmd, prefs, version, into, recache, target);
cacheFromBag(bagfile, need, cached, resmd, opts.prefs, version, into, opts.recache, target);
}
catch (FileNotFoundException ex) {
log.error("Member bag not found in store (skipping): "+bagfile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,12 @@ public Set<String> cacheDataset(String aipid, String version, Cache into, boolea
throws StorageVolumeException, ResourceNotFoundException, CacheManagementException {

Set<String> cachedFiles;
CacheOpts opts = new CacheOpts(recache, prefs, null);

try {
cachedFiles = cacheDatasetFromStore(aipid, version, into, recache, prefs, target,
restrictedLtstore);
cachedFiles = cacheDatasetFromStore(aipid, version, into, opts, target, restrictedLtstore);
} catch (ResourceNotFoundException ex) {
cachedFiles = cacheDatasetFromStore(aipid, version, into, recache, prefs, target,
ltstore);
cachedFiles = cacheDatasetFromStore(aipid, version, into, opts, target, ltstore);
}

return cachedFiles;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,6 @@ public HttpURLConnectionRPARequestHandlerService(RPAConfiguration rpaConfigurati

// Set HttpClient
this.httpClient = HttpClients.createDefault();

// Log RPA configuration coming from the config server
LOGGER.debug("RPA_CONFIGURATION=" + this.rpaConfiguration.toString());
}

public RPAConfiguration getConfig() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import gov.nist.oar.distrib.cachemgr.CacheObject;
import gov.nist.oar.distrib.cachemgr.InventoryException;
import gov.nist.oar.distrib.cachemgr.VolumeNotFoundException;
import gov.nist.oar.distrib.cachemgr.pdr.CacheOpts;
import gov.nist.oar.distrib.DistributionException;
import gov.nist.oar.distrib.ResourceNotFoundException;
import gov.nist.oar.distrib.StorageVolumeException;
Expand Down Expand Up @@ -318,12 +319,13 @@ public ResponseEntity<String> updateDataFile(@PathVariable("dsid") String dsid,
if (version != null)
id += "#"+version;
String recachep = request.getParameter("recache");
boolean recache = ! ("0".equals(recachep) || "false".equals(recachep));
boolean recache = ! (recachep == null || "0".equals(recachep) || "false".equals(recachep));
String seq = request.getParameter("seq");

if (":cached".equals(selector)) {
try {
mgr.queueCache(id, recache);
log.info("Queued for caching: {}", id);
mgr.queueCache(id, recache, seq);
log.info("Queued for caching: {} {}", id, ((seq==null) ? "" : "seq="+seq));
return new ResponseEntity<String>("Cache target queued", HttpStatus.ACCEPTED);
} catch (ResourceNotFoundException ex) {
return new ResponseEntity<String>("Resource not found", HttpStatus.NOT_FOUND);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public void getBundleDownloadPlanTest()
{
FileRequest[] inputfileList = new FileRequest[2];
String val1 = "{\"filePath\":\"/1894/license.pdf\",\"downloadUrl\":\"https://s3.amazonaws.com/nist-midas/1894/license.pdf\"}";
String val2 = "{\"filePath\":\"/1894/license2.pdf\",\"downloadUrl\":\"https://project-open-data.cio.gov/v1.1/schema/\"}";
String val2 = "{\"filePath\":\"/1894/license2.pdf\",\"downloadUrl\":\"https://wikipedia.net/wiki/URL_redirection\"}";

ObjectMapper mapper = new ObjectMapper();
FileRequest testval1 = mapper.readValue(val1, FileRequest.class);
Expand All @@ -56,12 +56,12 @@ public void getBundleDownloadPlanTest()
inputfileList[1] = testval2;
BundleRequest bFL = new BundleRequest("testdownload", inputfileList, 0,2);
DownloadBundlePlanner dpl = new DownloadBundlePlanner(bFL, 200000, 3,
"s3.amazonaws.com|project-open-data.cio.gov", "testdownload", 1);
"s3.amazonaws.com|wikipedia.net", "testdownload", 5);
BundleDownloadPlan bundlePlan = dpl.getBundleDownloadPlan();
assertEquals(bundlePlan.getPostEachTo(), "_bundle");
assertEquals(bundlePlan.getStatus(), "complete");
assertEquals(bundlePlan.getBundleNameFilePathUrl().length, 1);
assertEquals(bundlePlan.getBundleNameFilePathUrl()[0].getIncludeFiles().length, 2);
assertEquals(bundlePlan.getBundleNameFilePathUrl().length, 2);
assertEquals(bundlePlan.getBundleNameFilePathUrl()[0].getIncludeFiles().length, 1);

}

Expand Down
Loading

0 comments on commit 52d492d

Please sign in to comment.