Skip to content

Commit

Permalink
[ALS-5065] Write phenotypic and genomic data to file
Browse files Browse the repository at this point in the history
- We need to be able to upload hpds query results to aws
- Most of this work will be done by another service, but
we need hpds to write the results to a file prior to upload
  • Loading branch information
Luke Sikina committed Nov 16, 2023
1 parent 47b827b commit c4a248d
Show file tree
Hide file tree
Showing 6 changed files with 312 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import javax.ws.rs.core.Response.ResponseBuilder;
import javax.ws.rs.core.Response.Status;

import edu.harvard.hms.dbmi.avillach.hpds.service.filesharing.FileSharingService;
import edu.harvard.hms.dbmi.avillach.hpds.service.filesharing.FileSystemService;
import edu.harvard.hms.dbmi.avillach.hpds.service.util.Paginator;
import org.apache.http.entity.ContentType;
import org.slf4j.Logger;
Expand All @@ -34,6 +36,7 @@
import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query;
import edu.harvard.hms.dbmi.avillach.hpds.processing.*;
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestBody;

@Path("PIC-SURE")
@Produces("application/json")
Expand All @@ -42,13 +45,16 @@ public class PicSureService implements IResourceRS {

@Autowired
public PicSureService(QueryService queryService, TimelineProcessor timelineProcessor, CountProcessor countProcessor,
VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, Paginator paginator) {
VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor,
Paginator paginator, FileSharingService fileSystemService
) {
this.queryService = queryService;
this.timelineProcessor = timelineProcessor;
this.countProcessor = countProcessor;
this.variantListProcessor = variantListProcessor;
this.abstractProcessor = abstractProcessor;
this.paginator = paginator;
this.fileSystemService = fileSystemService;
Crypto.loadDefaultKey();
}

Expand All @@ -68,6 +74,8 @@ public PicSureService(QueryService queryService, TimelineProcessor timelineProce

private final Paginator paginator;

private final FileSharingService fileSystemService;

private static final String QUERY_METADATA_FIELD = "queryMetadata";
private static final int RESPONSE_CACHE_SIZE = 50;

Expand Down Expand Up @@ -250,6 +258,36 @@ public Response queryResult(@PathParam("resourceQueryId") UUID queryId, QueryReq
}
}

@POST
@Path("/write/{dataType}")
public Response writeQueryResult(
@RequestBody() Query query, @PathParam("dataType") String datatype
) {
AsyncResult result = queryService.getResultFor(query.getId());
// the queryResult has this DIY retry logic that blocks a system thread.
// I'm not going to do that here. If the service can't find it, you get a 404.
// Retry it client side.
if (result == null) {
return Response.status(404).build();
}
if (result.status == AsyncResult.Status.ERROR) {
return Response.status(500).build();
}
if (result.status != AsyncResult.Status.SUCCESS) {
return Response.status(503).build(); // 503 = unavailable
}

// at least for now, this is going to block until we finish writing
// Not very restful, but it will make this API very easy to consume
boolean success = false;
if ("phenotypic".equals(datatype)) {
success = fileSystemService.createPhenotypicData(query);
} else if ("genomic".equals(datatype)) {
success = fileSystemService.createGenomicData(query);
}
return success ? Response.ok().build() : Response.serverError().build();
}

@POST
@Path("/query/{resourceQueryId}/status")
@Override
Expand All @@ -266,7 +304,7 @@ public Response queryFormat(QueryRequest resultRequest) {
return Response.ok().entity(convertIncomingQuery(resultRequest).toString()).build();
} catch (IOException e) {
return Response.ok()
.entity("An error occurred formatting the query for display: " + e.getLocalizedMessage()).build();
.entity("An error occurred formatting the query for display: " + e.getLocalizedMessage()).build();
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing;

import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query;
import edu.harvard.hms.dbmi.avillach.hpds.data.query.ResultType;
import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult;
import edu.harvard.hms.dbmi.avillach.hpds.processing.VariantListProcessor;
import edu.harvard.hms.dbmi.avillach.hpds.service.QueryService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.time.Duration;
import java.time.temporal.ChronoUnit;

/**
* Used for sharing data. Given a query, this service will write
* phenotypic and genomic data into a directory
*/
@Service
public class FileSharingService {

private static final Logger LOG = LoggerFactory.getLogger(FileSharingService.class);

@Autowired
private QueryService queryService;

@Autowired
private FileSystemService fileWriter;

@Autowired
private VariantListProcessor variantListProcessor;

public boolean createPhenotypicData(Query query) {
AsyncResult result = queryService.getResultFor(query.getId());
if (result.status != AsyncResult.Status.SUCCESS) {
return false;
}
return fileWriter.writeResultToFile("phenotypic_data.tsv", query.getId(), result);
}

public boolean createGenomicData(Query query) {
try {
String vcf = variantListProcessor.runVcfExcerptQuery(query, true);
return fileWriter.writeResultToFile("genomic_data.tsv", query.getId(), vcf, query.getId());
} catch (IOException e) {
LOG.error("Error running genomic query", e);
return false;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing;

import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.nio.file.Path;

@Configuration
public class FileSystemConfig {
@Value("${file_sharing_root:/gic_query_results/}")
private String fileSharingRootDir;

@Value("${enable_file_sharing:false}")
private boolean enableFileSharing;

@Bean()
Path sharingRoot() {
if (!enableFileSharing) {
return Path.of("/dev/null");
}

Path path = Path.of(fileSharingRootDir);
if (!path.toFile().exists()) {
throw new RuntimeException(fileSharingRootDir + " DNE.");
}

return path;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing;

import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;

@Service
public class FileSystemService {

private static final Logger LOG = LoggerFactory.getLogger(FileSystemService.class);

@Autowired
private Path sharingRoot;

@Value("${enable_file_sharing:false}")
private boolean enableFileSharing;

public boolean writeResultToFile(String fileName, String directory, AsyncResult result) {
result.stream.open();
return writeStreamToFile(fileName, result.stream, result.id);
}

public boolean writeResultToFile(String fileName, String directory, String result, String id) {
return writeStreamToFile(fileName, new ByteArrayInputStream(result.getBytes()), id);
}

private boolean writeStreamToFile(String fileName, InputStream content, String queryId) {
if (!enableFileSharing) {
LOG.warn("Attempted to write query result to file while file sharing is disabled. No-op.");
return false;
}

Path dirPath = Path.of(sharingRoot.toString(), queryId);
Path filePath = Path.of(sharingRoot.toString(), queryId, fileName);

try {
LOG.info("Writing query {} to file: {}", queryId, filePath);
if (!Files.exists(dirPath)) {
Files.createDirectory(dirPath);
}
return Files.copy(content, filePath) > 0;
} catch (IOException e) {
LOG.error("Error writing result.", e);
return false;
} finally {
IOUtils.closeQuietly(content);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing;

import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query;
import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult;
import edu.harvard.hms.dbmi.avillach.hpds.processing.VariantListProcessor;
import edu.harvard.hms.dbmi.avillach.hpds.service.QueryService;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.MockitoJUnitRunner;

import java.io.IOException;

@RunWith(MockitoJUnitRunner.class)
public class FileSharingServiceTest {

@Mock
QueryService queryService;

@Mock
FileSystemService fileWriter;

@Mock
VariantListProcessor variantListProcessor;

@InjectMocks
FileSharingService subject;

@Test
public void shouldCreatePhenotypicData() {
Query query = new Query();
query.setId("my-id");
AsyncResult result = new AsyncResult(query, new String[]{});
result.status = AsyncResult.Status.SUCCESS;

Mockito.when(queryService.getResultFor("my-id"))
.thenReturn(result);
Mockito.when(fileWriter.writeResultToFile("phenotypic_data.tsv", "my-id", result))
.thenReturn(true);

boolean actual = subject.createPhenotypicData(query);

Assert.assertTrue(actual);
}

@Test
public void shouldNotCreatePhenotypicData() {
Query query = new Query();
query.setId("my-id");
AsyncResult result = new AsyncResult(query, new String[]{});
result.status = AsyncResult.Status.ERROR;

Mockito.when(queryService.getResultFor("my-id"))
.thenReturn(result);

boolean actual = subject.createPhenotypicData(query);

Assert.assertFalse(actual);
}

@Test
public void shouldCreateGenomicData() throws IOException {
Query query = new Query();
query.setId("my-id");
String vcf = "lol lets put the whole vcf in a string";
Mockito.when(variantListProcessor.runVcfExcerptQuery(query, true))
.thenReturn(vcf);
Mockito.when(fileWriter.writeResultToFile("genomic_data.tsv", "my-id", vcf, "my-id"))
.thenReturn(true);

boolean actual = subject.createGenomicData(query);

Assert.assertTrue(actual);
}

@Test
public void shouldNotCreateGenomicData() throws IOException {
Query query = new Query();
query.setId("my-id");
Mockito.when(variantListProcessor.runVcfExcerptQuery(query, true))
.thenThrow(new IOException("oh no!"));

boolean actual = subject.createGenomicData(query);

Assert.assertFalse(actual);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package edu.harvard.hms.dbmi.avillach.hpds.service.filesharing;

import org.junit.Assert;
import org.junit.Test;
import org.springframework.test.util.ReflectionTestUtils;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

public class FileSystemServiceTest {

@Test
public void shouldWriteToFile() throws IOException {
Path dir = Files.createTempDirectory("my-upload-dir");
FileSystemService subject = new FileSystemService();
ReflectionTestUtils.setField(subject, "sharingRoot", dir);
ReflectionTestUtils.setField(subject, "enableFileSharing", true);
String fileContent = "I just got an ad that tried to sell a baguette with moz, dressing, " +
"and tomatoes as a healthy lunch, and that's just so far from the truth that it's bugging me. " +
"Like, come on. It's bread and cheese and oil. I don't care how fresh the tomatoes are.";

boolean actual = subject.writeResultToFile("out.tsv", dir.toAbsolutePath().toString(), fileContent, "my-id");
String actualContent = Files.readString(dir.resolve("my-id/out.tsv"));

Assert.assertTrue(actual);
Assert.assertEquals(fileContent, actualContent);
}

@Test
public void shouldNotWriteToFile() throws IOException {
Path dir = Files.createTempDirectory("my-upload-dir");
FileSystemService subject = new FileSystemService();
ReflectionTestUtils.setField(subject, "sharingRoot", dir);
ReflectionTestUtils.setField(subject, "enableFileSharing", false);

boolean actual = subject.writeResultToFile("out.tsv", dir.toAbsolutePath().toString(), "", "my-id");

Assert.assertFalse(actual);
}
}

0 comments on commit c4a248d

Please sign in to comment.