diff --git a/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java b/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java index 8e28e74e..56b961b0 100644 --- a/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java +++ b/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java @@ -30,6 +30,7 @@ public Query(Query query) { }); } this.id = query.id; + this.picSureId = query.picSureId; } private ResultType expectedResultType = ResultType.COUNT; @@ -43,6 +44,7 @@ public Query(Query query) { private List variantInfoFilters = new ArrayList<>(); private String id; + private String picSureId; public ResultType getExpectedResultType() { return expectedResultType; @@ -127,6 +129,14 @@ public void setId(String id) { this.id = id; } + public String getPicSureId() { + return picSureId; + } + + public void setPicSureId(String picSureId) { + this.picSureId = picSureId; + } + public static class VariantInfoFilter { public VariantInfoFilter() { diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index 0a196f5b..6323a400 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -29,6 +29,9 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; +import javax.annotation.Nullable; +import javax.validation.constraints.NotNull; + @Component public class AbstractProcessor { @@ -187,6 +190,16 @@ protected Set applyBooleanLogic(List> filteredIdSets) { return ids[0]; } + private class IntersectingSetContainer { + Set set = null; + boolean initialized = false; + + void intersect(@NotNull Set toIntersect) { + initialized = true; + set = set == null ? toIntersect : Sets.intersection(set, toIntersect); + } + } + /** * For each filter in the query, return a set of patient ids that match. The order of these sets in the * returned list of sets does not matter and cannot currently be tied back to the filter that generated @@ -196,26 +209,27 @@ protected Set applyBooleanLogic(List> filteredIdSets) { * @return */ protected List> idSetsForEachFilter(Query query) { - final ArrayList> filteredIdSets = new ArrayList<>(); + IntersectingSetContainer ids = new IntersectingSetContainer<>(); try { - query.getAllAnyRecordOf().forEach(anyRecordOfFilterList -> { - addIdSetsForAnyRecordOf(anyRecordOfFilterList, filteredIdSets); - }); - addIdSetsForRequiredFields(query, filteredIdSets); - addIdSetsForNumericFilters(query, filteredIdSets); - addIdSetsForCategoryFilters(query, filteredIdSets); + for (List anyRecordOfFilterList : query.getAllAnyRecordOf()) { + ids = addIdSetsForAnyRecordOf(anyRecordOfFilterList, ids); + } + ids = addIdSetsForRequiredFields(query, ids); + ids = addIdSetsForNumericFilters(query, ids); + ids = addIdSetsForCategoryFilters(query, ids); } catch (InvalidCacheLoadException e) { log.warn("Invalid query supplied: " + e.getLocalizedMessage()); - filteredIdSets.add(new HashSet<>()); // if an invalid path is supplied, no patients should match. + return List.of(new HashSet<>()); } + + //AND logic to make sure all patients match each filter - if(filteredIdSets.size()>1) { - List> processedFilteredIdSets = new ArrayList<>(List.of(applyBooleanLogic(filteredIdSets))); - return addIdSetsForVariantInfoFilters(query, processedFilteredIdSets); + if (ids.initialized) { + return addIdSetsForVariantInfoFilters(query, new ArrayList<>(List.of(ids.set))); } else { - return addIdSetsForVariantInfoFilters(query, filteredIdSets); + return addIdSetsForVariantInfoFilters(query, new ArrayList<>()); } } @@ -249,22 +263,23 @@ public TreeSet getPatientSubsetForQuery(Query query) { return idList; } - private void addIdSetsForRequiredFields(Query query, ArrayList> filteredIdSets) { + private IntersectingSetContainer addIdSetsForRequiredFields(Query query, IntersectingSetContainer filteredIdSets) { if(!query.getRequiredFields().isEmpty()) { VariantBucketHolder bucketCache = new VariantBucketHolder<>(); - filteredIdSets.addAll(query.getRequiredFields().parallelStream().map(path->{ - if(VariantUtils.pathIsVariantSpec(path)) { + query.getRequiredFields().parallelStream().map(path -> { + if (VariantUtils.pathIsVariantSpec(path)) { TreeSet patientsInScope = new TreeSet<>(); - addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1","1/1"}, path, patientsInScope, bucketCache); + addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1", "1/1"}, path, patientsInScope, bucketCache); return patientsInScope; } else { - return new TreeSet(getCube(path).keyBasedIndex()); + return (Set) new TreeSet(getCube(path).keyBasedIndex()); } - }).collect(Collectors.toSet())); + }).forEach(filteredIdSets::intersect); } + return filteredIdSets; } - private void addIdSetsForAnyRecordOf(List anyRecordOfFilters, ArrayList> filteredIdSets) { + private IntersectingSetContainer addIdSetsForAnyRecordOf(List anyRecordOfFilters, IntersectingSetContainer filteredIdSets) { if(!anyRecordOfFilters.isEmpty()) { VariantBucketHolder bucketCache = new VariantBucketHolder<>(); Set anyRecordOfPatientSet = anyRecordOfFilters.parallelStream().flatMap(path -> { @@ -281,35 +296,37 @@ private void addIdSetsForAnyRecordOf(List anyRecordOfFilters, ArrayList< } } }).collect(Collectors.toSet()); - filteredIdSets.add(anyRecordOfPatientSet); + filteredIdSets.intersect(anyRecordOfPatientSet); } + return filteredIdSets; } - private void addIdSetsForNumericFilters(Query query, ArrayList> filteredIdSets) { + private IntersectingSetContainer addIdSetsForNumericFilters(Query query, IntersectingSetContainer filteredIdSets) { if(!query.getNumericFilters().isEmpty()) { - filteredIdSets.addAll((Set>)(query.getNumericFilters().entrySet().parallelStream().map(entry->{ - return (TreeSet)(getCube(entry.getKey()).getKeysForRange(entry.getValue().getMin(), entry.getValue().getMax())); - }).collect(Collectors.toSet()))); + query.getNumericFilters().entrySet().parallelStream().map(entry-> { + return (Set)(getCube(entry.getKey()).getKeysForRange(entry.getValue().getMin(), entry.getValue().getMax())); + }).forEach(filteredIdSets::intersect); } + return filteredIdSets; } - private void addIdSetsForCategoryFilters(Query query, ArrayList> filteredIdSets) { - if(!query.getCategoryFilters().isEmpty()) { - VariantBucketHolder bucketCache = new VariantBucketHolder<>(); - Set> idsThatMatchFilters = query.getCategoryFilters().entrySet().parallelStream().map(entry->{ - Set ids = new TreeSet<>(); - if(VariantUtils.pathIsVariantSpec(entry.getKey())) { - addIdSetsForVariantSpecCategoryFilters(entry.getValue(), entry.getKey(), ids, bucketCache); - } else { - String[] categoryFilter = entry.getValue(); - for(String category : categoryFilter) { - ids.addAll(getCube(entry.getKey()).getKeysForValue(category)); - } + private IntersectingSetContainer addIdSetsForCategoryFilters( + Query query, IntersectingSetContainer startingIds + ) { + VariantBucketHolder bucketCache = new VariantBucketHolder<>(); + query.getCategoryFilters().entrySet().parallelStream().map(entry->{ + Set ids = new TreeSet<>(); + if(VariantUtils.pathIsVariantSpec(entry.getKey())) { + addIdSetsForVariantSpecCategoryFilters(entry.getValue(), entry.getKey(), ids, bucketCache); + } else { + String[] categoryFilter = entry.getValue(); + for(String category : categoryFilter) { + ids.addAll(getCube(entry.getKey()).getKeysForValue(category)); } - return ids; - }).collect(Collectors.toSet()); - filteredIdSets.addAll(idsThatMatchFilters); - } + } + return ids; + }).forEach(startingIds::intersect); + return startingIds; } private void addIdSetsForVariantSpecCategoryFilters(String[] zygosities, String key, Set ids, VariantBucketHolder bucketCache) { diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AsyncResult.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AsyncResult.java index e4434abf..acc13d11 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AsyncResult.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AsyncResult.java @@ -1,6 +1,7 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing; import java.io.IOException; +import java.nio.file.Path; import java.util.concurrent.ExecutorService; import java.util.concurrent.RejectedExecutionException; @@ -131,5 +132,9 @@ public void enqueue() { public int compareTo(AsyncResult o) { return this.query.getId().compareTo(o.query.getId()); } + + public Path getTempFilePath() { + return stream.getTempFilePath(); + } } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ResultStoreStream.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ResultStoreStream.java index 5bb23d81..8f71dbb6 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ResultStoreStream.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ResultStoreStream.java @@ -1,6 +1,7 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing; import java.io.*; +import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.TreeMap; @@ -225,4 +226,8 @@ public long estimatedSize() { return tempFile.length(); } + public Path getTempFilePath() { + return Path.of(tempFile.getAbsolutePath()); + } + } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java index ffd7233c..dee9246e 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java @@ -83,7 +83,7 @@ public void runQuery(Query query, AsyncResult result) { * @throws IOException */ private void exportTimeData(Query query, AsyncResult result, TreeSet idList) throws IOException { - + log.info("Starting export for time series data of query {} (HPDS ID {})", query.getPicSureId(), query.getId()); Set exportedConceptPaths = new HashSet(); //get a list of all fields mentioned in the query; export all data associated with any included field List pathList = new LinkedList(); diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java index 12287521..28baf105 100644 --- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java @@ -11,7 +11,10 @@ import javax.ws.rs.core.Response.ResponseBuilder; import javax.ws.rs.core.Response.Status; +import edu.harvard.hms.dbmi.avillach.hpds.data.query.ResultType; +import edu.harvard.hms.dbmi.avillach.hpds.service.filesharing.FileSharingService; import edu.harvard.hms.dbmi.avillach.hpds.service.util.Paginator; +import edu.harvard.hms.dbmi.avillach.hpds.service.util.QueryDecorator; import org.apache.http.entity.ContentType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,13 +30,13 @@ import edu.harvard.dbmi.avillach.domain.*; import edu.harvard.dbmi.avillach.service.IResourceRS; -import edu.harvard.dbmi.avillach.util.UUIDv5; import edu.harvard.hms.dbmi.avillach.hpds.crypto.Crypto; import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.FileBackedByteIndexedInfoStore; import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.ColumnMeta; import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; import edu.harvard.hms.dbmi.avillach.hpds.processing.*; import org.springframework.stereotype.Component; +import org.springframework.web.bind.annotation.RequestBody; @Path("PIC-SURE") @Produces("application/json") @@ -42,13 +45,17 @@ public class PicSureService implements IResourceRS { @Autowired public PicSureService(QueryService queryService, TimelineProcessor timelineProcessor, CountProcessor countProcessor, - VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, Paginator paginator) { + VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, + Paginator paginator, FileSharingService fileSystemService, QueryDecorator queryDecorator + ) { this.queryService = queryService; this.timelineProcessor = timelineProcessor; this.countProcessor = countProcessor; this.variantListProcessor = variantListProcessor; this.abstractProcessor = abstractProcessor; this.paginator = paginator; + this.fileSystemService = fileSystemService; + this.queryDecorator = queryDecorator; Crypto.loadDefaultKey(); } @@ -68,6 +75,10 @@ public PicSureService(QueryService queryService, TimelineProcessor timelineProce private final Paginator paginator; + private final FileSharingService fileSystemService; + + private final QueryDecorator queryDecorator; + private static final String QUERY_METADATA_FIELD = "queryMetadata"; private static final int RESPONSE_CACHE_SIZE = 50; @@ -216,7 +227,8 @@ private QueryStatus convertToQueryStatus(AsyncResult entity) { status.setStatus(entity.status.toPicSureStatus()); Map metadata = new HashMap(); - metadata.put("picsureQueryId", UUIDv5.UUIDFromString(entity.query.toString())); + queryDecorator.setId(entity.query); + metadata.put("picsureQueryId", entity.query.getId()); status.setResultMetadata(metadata); return status; } @@ -250,6 +262,56 @@ public Response queryResult(@PathParam("resourceQueryId") UUID queryId, QueryReq } } + @POST + @Path("/write/{dataType}") + public Response writeQueryResult( + @RequestBody() Query query, @PathParam("dataType") String datatype + ) { + if (query.getExpectedResultType() != ResultType.DATAFRAME_TIMESERIES) { + return Response + .status(400, "The write endpoint only writes time series dataframes. Fix result type.") + .build(); + } + String hpdsQueryID; + try { + QueryStatus queryStatus = convertToQueryStatus(queryService.runQuery(query)); + String status = queryStatus.getResourceStatus(); + hpdsQueryID = queryStatus.getResourceResultId(); + while ("RUNNING".equalsIgnoreCase(status) || "PENDING".equalsIgnoreCase(status)) { + Thread.sleep(10000); // Yea, this is not restful. Sorry. + status = convertToQueryStatus(queryService.getStatusFor(hpdsQueryID)).getResourceStatus(); + } + } catch (ClassNotFoundException | IOException | InterruptedException e) { + log.warn("Error waiting for response", e); + return Response.serverError().build(); + } + + AsyncResult result = queryService.getResultFor(hpdsQueryID); + // the queryResult has this DIY retry logic that blocks a system thread. + // I'm not going to do that here. If the service can't find it, you get a 404. + // Retry it client side. + if (result == null) { + return Response.status(404).build(); + } + if (result.status == AsyncResult.Status.ERROR) { + return Response.status(500).build(); + } + if (result.status != AsyncResult.Status.SUCCESS) { + return Response.status(503).build(); // 503 = unavailable + } + + // at least for now, this is going to block until we finish writing + // Not very restful, but it will make this API very easy to consume + boolean success = false; + query.setId(hpdsQueryID); + if ("phenotypic".equals(datatype)) { + success = fileSystemService.createPhenotypicData(query); + } else if ("genomic".equals(datatype)) { + success = fileSystemService.createGenomicData(query); + } + return success ? Response.ok().build() : Response.serverError().build(); + } + @POST @Path("/query/{resourceQueryId}/status") @Override @@ -266,7 +328,7 @@ public Response queryFormat(QueryRequest resultRequest) { return Response.ok().entity(convertIncomingQuery(resultRequest).toString()).build(); } catch (IOException e) { return Response.ok() - .entity("An error occurred formatting the query for display: " + e.getLocalizedMessage()).build(); + .entity("An error occurred formatting the query for display: " + e.getLocalizedMessage()).build(); } } @@ -276,7 +338,7 @@ public Response queryFormat(QueryRequest resultRequest) { public Response querySync(QueryRequest resultRequest) { if (Crypto.hasKey(Crypto.DEFAULT_KEY_NAME)) { try { - return _querySync(resultRequest); + return submitQueryAndWaitForCompletion(resultRequest); } catch (IOException e) { log.error("IOException caught: ", e); return Response.serverError().build(); @@ -305,7 +367,7 @@ public PaginatedSearchResult searchGenomicConceptValues( return paginator.paginate(matchingValues, page, size); } - private Response _querySync(QueryRequest resultRequest) throws IOException { + private Response submitQueryAndWaitForCompletion(QueryRequest resultRequest) throws IOException { Query incomingQuery; incomingQuery = convertIncomingQuery(resultRequest); log.info("Query Converted"); @@ -324,6 +386,7 @@ private Response _querySync(QueryRequest resultRequest) throws IOException { case DATAFRAME: case SECRET_ADMIN_DATAFRAME: + case DATAFRAME_TIMESERIES: case DATAFRAME_MERGED: QueryStatus status = query(resultRequest); while (status.getResourceStatus().equalsIgnoreCase("RUNNING") @@ -386,6 +449,7 @@ private Response _querySync(QueryRequest resultRequest) throws IOException { } private ResponseBuilder queryOkResponse(Object obj, Query incomingQuery) { - return Response.ok(obj).header(QUERY_METADATA_FIELD, UUIDv5.UUIDFromString(incomingQuery.toString())); + queryDecorator.setId(incomingQuery); + return Response.ok(obj).header(QUERY_METADATA_FIELD, incomingQuery.getId()); } } diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java index 54481f59..fe120904 100644 --- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java @@ -7,12 +7,10 @@ import java.util.function.Predicate; import java.util.stream.Collectors; +import edu.harvard.hms.dbmi.avillach.hpds.service.util.QueryDecorator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableMap; - -import edu.harvard.dbmi.avillach.util.UUIDv5; import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; import edu.harvard.hms.dbmi.avillach.hpds.processing.*; import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult.Status; @@ -41,16 +39,21 @@ public class QueryService { private final QueryProcessor queryProcessor; private final TimeseriesProcessor timeseriesProcessor; private final CountProcessor countProcessor; + private final QueryDecorator queryDecorator; HashMap results = new HashMap<>(); @Autowired - public QueryService (AbstractProcessor abstractProcessor, QueryProcessor queryProcessor, TimeseriesProcessor timeseriesProcessor, CountProcessor countProcessor) { + public QueryService ( + AbstractProcessor abstractProcessor, QueryProcessor queryProcessor, TimeseriesProcessor timeseriesProcessor, + CountProcessor countProcessor, QueryDecorator queryDecorator + ) { this.abstractProcessor = abstractProcessor; this.queryProcessor = queryProcessor; this.timeseriesProcessor = timeseriesProcessor; this.countProcessor = countProcessor; + this.queryDecorator = queryDecorator; SMALL_JOB_LIMIT = getIntProp("SMALL_JOB_LIMIT"); SMALL_TASK_THREADS = getIntProp("SMALL_TASK_THREADS"); @@ -69,9 +72,9 @@ public QueryService (AbstractProcessor abstractProcessor, QueryProcessor queryPr public AsyncResult runQuery(Query query) throws ClassNotFoundException, IOException { // Merging fields from filters into selected fields for user validation of results - mergeFilterFieldsIntoSelectedFields(query); - - Collections.sort(query.getFields()); + List fields = query.getFields(); + Collections.sort(fields); + query.setFields(fields); AsyncResult result = initializeResult(query); @@ -120,37 +123,12 @@ private AsyncResult initializeResult(Query query) throws ClassNotFoundException, AsyncResult result = new AsyncResult(query, p.getHeaderRow(query)); result.status = AsyncResult.Status.PENDING; result.queuedTime = System.currentTimeMillis(); - result.id = UUIDv5.UUIDFromString(query.toString()).toString(); + queryDecorator.setId(query); + result.id = query.getId(); result.processor = p; - query.setId(result.id); results.put(result.id, result); return result; } - - - private void mergeFilterFieldsIntoSelectedFields(Query query) { - LinkedHashSet fields = new LinkedHashSet<>(); - fields.addAll(query.getFields()); - if(!query.getCategoryFilters().isEmpty()) { - Set categoryFilters = new TreeSet(query.getCategoryFilters().keySet()); - Set toBeRemoved = new TreeSet(); - for(String categoryFilter : categoryFilters) { - System.out.println("In : " + categoryFilter); - if(VariantUtils.pathIsVariantSpec(categoryFilter)) { - toBeRemoved.add(categoryFilter); - } - } - categoryFilters.removeAll(toBeRemoved); - for(String categoryFilter : categoryFilters) { - System.out.println("Out : " + categoryFilter); - } - fields.addAll(categoryFilters); - } - fields.addAll(query.getAnyRecordOf()); - fields.addAll(query.getRequiredFields()); - fields.addAll(query.getNumericFilters().keySet()); - query.setFields(fields); - } private boolean ensureAllFieldsExist(Query query) { TreeSet allFields = new TreeSet<>(); diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingService.java new file mode 100644 index 00000000..47b3ea0a --- /dev/null +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingService.java @@ -0,0 +1,52 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; +import edu.harvard.hms.dbmi.avillach.hpds.data.query.ResultType; +import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult; +import edu.harvard.hms.dbmi.avillach.hpds.processing.VariantListProcessor; +import edu.harvard.hms.dbmi.avillach.hpds.service.QueryService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.io.IOException; +import java.time.Duration; +import java.time.temporal.ChronoUnit; + +/** + * Used for sharing data. Given a query, this service will write + * phenotypic and genomic data into a directory + */ +@Service +public class FileSharingService { + + private static final Logger LOG = LoggerFactory.getLogger(FileSharingService.class); + + @Autowired + private QueryService queryService; + + @Autowired + private FileSystemService fileWriter; + + @Autowired + private VariantListProcessor variantListProcessor; + + public boolean createPhenotypicData(Query query) { + AsyncResult result = queryService.getResultFor(query.getId()); + if (result == null || result.status != AsyncResult.Status.SUCCESS) { + return false; + } + return fileWriter.writeResultToFile("phenotypic_data.csv", result, query.getPicSureId()); + } + + public boolean createGenomicData(Query query) { + try { + String vcf = variantListProcessor.runVcfExcerptQuery(query, true); + return fileWriter.writeResultToFile("genomic_data.tsv", vcf, query.getPicSureId()); + } catch (IOException e) { + LOG.error("Error running genomic query", e); + return false; + } + } +} diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemConfig.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemConfig.java new file mode 100644 index 00000000..1448c8f2 --- /dev/null +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemConfig.java @@ -0,0 +1,30 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.nio.file.Path; + +@Configuration +public class FileSystemConfig { + @Value("${file_sharing_root:/gic_query_results/}") + private String fileSharingRootDir; + + @Value("${enable_file_sharing:false}") + private boolean enableFileSharing; + + @Bean() + Path sharingRoot() { + if (!enableFileSharing) { + return Path.of("/dev/null"); + } + + Path path = Path.of(fileSharingRootDir); + if (!path.toFile().exists()) { + throw new RuntimeException(fileSharingRootDir + " DNE."); + } + + return path; + } +} diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemService.java new file mode 100644 index 00000000..7a0f9a77 --- /dev/null +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemService.java @@ -0,0 +1,90 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; + +@Service +public class FileSystemService { + + private static final Logger LOG = LoggerFactory.getLogger(FileSystemService.class); + + @Autowired + private Path sharingRoot; + + @Value("${enable_file_sharing:false}") + private boolean enableFileSharing; + + public boolean writeResultToFile(String fileName, AsyncResult result, String id) { + if (Files.exists(result.getTempFilePath())) { + LOG.info("A temp file already exists for query {}. Moving that rather than rewriting.", id); + return moveFile(fileName, result.getTempFilePath(), id); + } + result.stream.open(); + return writeStreamToFile(fileName, result.stream, id); + } + + public boolean writeResultToFile(String fileName, String result, String id) { + return writeStreamToFile(fileName, new ByteArrayInputStream(result.getBytes()), id); + } + + + private boolean moveFile(String destinationName, Path sourceFile, String queryId) { + if (!enableFileSharing) { + LOG.warn("Attempted to write query result to file while file sharing is disabled. No-op."); + return false; + } + + Path dirPath = Path.of(sharingRoot.toString(), queryId); + Path filePath = Path.of(sharingRoot.toString(), queryId, destinationName); + + try { + LOG.info("Moving query {} to file: {}", queryId, filePath); + makeDirIfDNE(dirPath); + Path result = Files.move(sourceFile, filePath, REPLACE_EXISTING); + return Files.exists(result); + } catch (IOException e) { + LOG.error("Error moving.", e); + return false; + } + } + + private boolean writeStreamToFile(String fileName, InputStream content, String queryId) { + if (!enableFileSharing) { + LOG.warn("Attempted to write query result to file while file sharing is disabled. No-op."); + return false; + } + + Path dirPath = Path.of(sharingRoot.toString(), queryId); + Path filePath = Path.of(sharingRoot.toString(), queryId, fileName); + + try { + LOG.info("Writing query {} to file: {}", queryId, filePath); + makeDirIfDNE(dirPath); + return Files.copy(content, filePath) > 0; + } catch (IOException e) { + LOG.error("Error writing result.", e); + return false; + } finally { + IOUtils.closeQuietly(content); + } + } + + private synchronized void makeDirIfDNE(Path dirPath) throws IOException { + if (!Files.exists(dirPath)) { + Files.createDirectory(dirPath); + } + } +} diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/util/QueryDecorator.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/util/QueryDecorator.java new file mode 100644 index 00000000..ed3d3c36 --- /dev/null +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/util/QueryDecorator.java @@ -0,0 +1,58 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.util; + +import edu.harvard.dbmi.avillach.util.UUIDv5; +import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; +import edu.harvard.hms.dbmi.avillach.hpds.processing.VariantUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; + +import java.util.*; + +@Component +public class QueryDecorator { + private static final Logger LOG = LoggerFactory.getLogger(QueryDecorator.class); + + public void setId(Query query) { + query.setId(""); // the id is included in the toString + // I clear it here to keep the ID setting stable for any query + // of identical structure and content + + // Some places where we call toString, we call mergeFilterFieldsIntoSelectedFields + // first. This can mutate the query, resulting in shifting UUIDs + // To stabilize things, we're always going to call that, and shift the logic here + mergeFilterFieldsIntoSelectedFields(query); + + // we also sort things sometimes + List fields = query.getFields(); + Collections.sort(fields); + query.setFields(fields); + + String id = UUIDv5.UUIDFromString(query.toString()).toString(); + query.setId(id); + } + + public void mergeFilterFieldsIntoSelectedFields(Query query) { + LinkedHashSet fields = new LinkedHashSet<>(query.getFields()); + + if(!query.getCategoryFilters().isEmpty()) { + Set categoryFilters = new TreeSet<>(query.getCategoryFilters().keySet()); + Set toBeRemoved = new TreeSet<>(); + for(String categoryFilter : categoryFilters) { + LOG.debug("In : {}", categoryFilter); + if(VariantUtils.pathIsVariantSpec(categoryFilter)) { + toBeRemoved.add(categoryFilter); + } + } + categoryFilters.removeAll(toBeRemoved); + for(String categoryFilter : categoryFilters) { + LOG.debug("Out : {}", categoryFilter); + } + fields.addAll(categoryFilters); + } + fields.addAll(query.getAnyRecordOf()); + fields.addAll(query.getRequiredFields()); + fields.addAll(query.getNumericFilters().keySet()); + query.setFields(fields); + } +} diff --git a/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingServiceTest.java b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingServiceTest.java new file mode 100644 index 00000000..9550b82c --- /dev/null +++ b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSharingServiceTest.java @@ -0,0 +1,92 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; +import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult; +import edu.harvard.hms.dbmi.avillach.hpds.processing.VariantListProcessor; +import edu.harvard.hms.dbmi.avillach.hpds.service.QueryService; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +import java.io.IOException; + +@RunWith(MockitoJUnitRunner.class) +public class FileSharingServiceTest { + + @Mock + QueryService queryService; + + @Mock + FileSystemService fileWriter; + + @Mock + VariantListProcessor variantListProcessor; + + @InjectMocks + FileSharingService subject; + + @Test + public void shouldCreatePhenotypicData() { + Query query = new Query(); + query.setId("my-id"); + query.setPicSureId("my-ps-id"); + AsyncResult result = new AsyncResult(query, new String[]{}); + result.status = AsyncResult.Status.SUCCESS; + + Mockito.when(queryService.getResultFor("my-id")) + .thenReturn(result); + Mockito.when(fileWriter.writeResultToFile("phenotypic_data.csv", result, "my-ps-id")) + .thenReturn(true); + + boolean actual = subject.createPhenotypicData(query); + + Assert.assertTrue(actual); + } + + @Test + public void shouldNotCreatePhenotypicData() { + Query query = new Query(); + query.setId("my-id"); + query.setPicSureId("my-ps-id"); + AsyncResult result = new AsyncResult(query, new String[]{}); + result.status = AsyncResult.Status.ERROR; + + Mockito.when(queryService.getResultFor("my-id")) + .thenReturn(result); + + boolean actual = subject.createPhenotypicData(query); + + Assert.assertFalse(actual); + } + + @Test + public void shouldCreateGenomicData() throws IOException { + Query query = new Query(); + query.setPicSureId("my-id"); + String vcf = "lol lets put the whole vcf in a string"; + Mockito.when(variantListProcessor.runVcfExcerptQuery(query, true)) + .thenReturn(vcf); + Mockito.when(fileWriter.writeResultToFile("genomic_data.tsv", vcf, "my-id")) + .thenReturn(true); + + boolean actual = subject.createGenomicData(query); + + Assert.assertTrue(actual); + } + + @Test + public void shouldNotCreateGenomicData() throws IOException { + Query query = new Query(); + query.setPicSureId("my-id"); + Mockito.when(variantListProcessor.runVcfExcerptQuery(query, true)) + .thenThrow(new IOException("oh no!")); + + boolean actual = subject.createGenomicData(query); + + Assert.assertFalse(actual); + } +} \ No newline at end of file diff --git a/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemServiceTest.java b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemServiceTest.java new file mode 100644 index 00000000..bddff644 --- /dev/null +++ b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/service/filesharing/FileSystemServiceTest.java @@ -0,0 +1,41 @@ +package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing; + +import org.junit.Assert; +import org.junit.Test; +import org.springframework.test.util.ReflectionTestUtils; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +public class FileSystemServiceTest { + + @Test + public void shouldWriteToFile() throws IOException { + Path dir = Files.createTempDirectory("my-upload-dir"); + FileSystemService subject = new FileSystemService(); + ReflectionTestUtils.setField(subject, "sharingRoot", dir); + ReflectionTestUtils.setField(subject, "enableFileSharing", true); + String fileContent = "I just got an ad that tried to sell a baguette with moz, dressing, " + + "and tomatoes as a healthy lunch, and that's just so far from the truth that it's bugging me. " + + "Like, come on. It's bread and cheese and oil. I don't care how fresh the tomatoes are."; + + boolean actual = subject.writeResultToFile("out.tsv", fileContent, "my-id"); + String actualContent = Files.readString(dir.resolve("my-id/out.tsv")); + + Assert.assertTrue(actual); + Assert.assertEquals(fileContent, actualContent); + } + + @Test + public void shouldNotWriteToFile() throws IOException { + Path dir = Files.createTempDirectory("my-upload-dir"); + FileSystemService subject = new FileSystemService(); + ReflectionTestUtils.setField(subject, "sharingRoot", dir); + ReflectionTestUtils.setField(subject, "enableFileSharing", false); + + boolean actual = subject.writeResultToFile("out.tsv", "", "my-id"); + + Assert.assertFalse(actual); + } +} \ No newline at end of file