From 1919f66b8cb0b9cc52f7edf61c3bbfc8dcf5d16e Mon Sep 17 00:00:00 2001 From: Luke Sikina Date: Tue, 23 Jan 2024 11:10:03 -0500 Subject: [PATCH] [ALS-5755] Switch time series processor to ISO timestamps - Make service that does this - Isolate time series logic a bit more - Tests --- .../hpds/processing/AbstractProcessor.java | 2 +- .../TimeSeriesConversionService.java | 18 ++++++++++++ .../{ => timeseries}/TimeseriesProcessor.java | 28 +++++++++++++++---- .../TimeSeriesConversionServiceTest.java | 21 ++++++++++++++ .../avillach/hpds/service/QueryService.java | 1 + 5 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionService.java rename processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/{ => timeseries}/TimeseriesProcessor.java (83%) create mode 100644 processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionServiceTest.java diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index 8276e3bb..7a1a5063 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -672,7 +672,7 @@ protected PhenoCube getCube(String path) { * Useful for federated pic-sure's where there are fewer * guarantees about concept paths. */ - protected Optional> nullableGetCube(String path) { + public Optional> nullableGetCube(String path) { try { return Optional.ofNullable(store.get(path)); } catch (InvalidCacheLoadException | ExecutionException e) { diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionService.java new file mode 100644 index 00000000..f26228c9 --- /dev/null +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionService.java @@ -0,0 +1,18 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.timeseries; + +import org.springframework.stereotype.Service; + +import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.util.Date; +import java.util.TimeZone; + +@Service +public class TimeSeriesConversionService { + + public String toISOString(Long unixTimeStamp) { + Instant instant = Instant.ofEpochMilli(unixTimeStamp); + return DateTimeFormatter.ISO_INSTANT.format(instant); + } +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeseriesProcessor.java similarity index 83% rename from processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java rename to processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeseriesProcessor.java index 2eabdf1d..c0a6b9b0 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/TimeseriesProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeseriesProcessor.java @@ -1,9 +1,13 @@ -package edu.harvard.hms.dbmi.avillach.hpds.processing; +package edu.harvard.hms.dbmi.avillach.hpds.processing.timeseries; import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; +import edu.harvard.hms.dbmi.avillach.hpds.processing.AbstractProcessor; +import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult; +import edu.harvard.hms.dbmi.avillach.hpds.processing.HpdsProcessor; +import edu.harvard.hms.dbmi.avillach.hpds.processing.QueryProcessor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,14 +40,16 @@ public class TimeseriesProcessor implements HpdsProcessor { private Logger log = LoggerFactory.getLogger(QueryProcessor.class); private AbstractProcessor abstractProcessor; + private final TimeSeriesConversionService conversionService; private final String ID_CUBE_NAME; private final int ID_BATCH_SIZE; private final int CACHE_SIZE; @Autowired - public TimeseriesProcessor(AbstractProcessor abstractProcessor) { + public TimeseriesProcessor(AbstractProcessor abstractProcessor, TimeSeriesConversionService conversionService) { this.abstractProcessor = abstractProcessor; + this.conversionService = conversionService; // todo: handle these via spring annotations CACHE_SIZE = Integer.parseInt(System.getProperty("CACHE_SIZE", "100")); ID_BATCH_SIZE = Integer.parseInt(System.getProperty("ID_BATCH_SIZE", "0")); @@ -115,14 +121,24 @@ private void addDataForConcepts(Collection pathList, Set exporte if (cube.isStringType()) { KeyAndValue keyAndValue = (KeyAndValue) kvObj; // "PATIENT_NUM","CONCEPT_PATH","NVAL_NUM","TVAL_CHAR","TIMESTAMP" - String[] entryData = { keyAndValue.getKey().toString(), conceptPath, "", keyAndValue.getValue(), - keyAndValue.getTimestamp().toString() }; + String[] entryData = { + keyAndValue.getKey().toString(), + conceptPath, + "", + keyAndValue.getValue(), + conversionService.toISOString(keyAndValue.getTimestamp()) + }; dataEntries.add(entryData); } else { // numeric KeyAndValue keyAndValue = (KeyAndValue) kvObj; // "PATIENT_NUM","CONCEPT_PATH","NVAL_NUM","TVAL_CHAR","TIMESTAMP" - String[] entryData = { keyAndValue.getKey().toString(), conceptPath, - keyAndValue.getValue().toString(), "", keyAndValue.getTimestamp().toString() }; + String[] entryData = { + keyAndValue.getKey().toString(), + conceptPath, + keyAndValue.getValue().toString(), + "", + conversionService.toISOString(keyAndValue.getTimestamp()) + }; dataEntries.add(entryData); } //batch exports so we don't take double memory (valuesForKeys + dataEntries could be a lot of data points) diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionServiceTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionServiceTest.java new file mode 100644 index 00000000..c09a369c --- /dev/null +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/timeseries/TimeSeriesConversionServiceTest.java @@ -0,0 +1,21 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.timeseries; + +import org.junit.Test; + +import java.util.TimeZone; + +import static org.junit.Assert.assertEquals; + + +public class TimeSeriesConversionServiceTest { + + TimeSeriesConversionService subject = new TimeSeriesConversionService(); + + @Test + public void shouldConvertToIsoString() { + String actual = subject.toISOString(0L); + String expected = "1970-01-01T00:00:00Z"; + + assertEquals(expected, actual); + } +} \ No newline at end of file diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java index fe120904..c1b69c3b 100644 --- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java @@ -7,6 +7,7 @@ import java.util.function.Predicate; import java.util.stream.Collectors; +import edu.harvard.hms.dbmi.avillach.hpds.processing.timeseries.TimeseriesProcessor; import edu.harvard.hms.dbmi.avillach.hpds.service.util.QueryDecorator; import org.slf4j.Logger; import org.slf4j.LoggerFactory;