diff --git a/src/main/java/org/mskcc/smile/service/impl/CmoLabelGeneratorServiceImpl.java b/src/main/java/org/mskcc/smile/service/impl/CmoLabelGeneratorServiceImpl.java index ba82b7e..78030d1 100644 --- a/src/main/java/org/mskcc/smile/service/impl/CmoLabelGeneratorServiceImpl.java +++ b/src/main/java/org/mskcc/smile/service/impl/CmoLabelGeneratorServiceImpl.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -780,18 +781,31 @@ private Set parseMatchingNucleicAcidCountersFromSampleLabels(String stA continue; } - Integer currentIncrement; - if (matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP) == null - || matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP).isEmpty()) { - currentIncrement = 1; - } else { - currentIncrement = Integer.valueOf(matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP)); + Integer currentIncrement = parseNucleicAcidCounterFromLabel(sample.getCmoSampleName()); + if (currentIncrement != null) { + nucAcidCountersByAltId.add(currentIncrement); } - nucAcidCountersByAltId.add(currentIncrement); } return nucAcidCountersByAltId; } + private Integer parseNucleicAcidCounterFromLabel(String cmoLabel) { + // if sample cmo label does not meet matcher criteria then skip + Matcher matcher = CMO_SAMPLE_ID_REGEX.matcher(cmoLabel); + if (!matcher.find()) { + return null; + } + + Integer currentIncrement; + if (matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP) == null + || matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP).isEmpty()) { + currentIncrement = 1; + } else { + currentIncrement = Integer.valueOf(matcher.group(CMO_SAMPLE_NUCACID_COUNTER_GROUP)); + } + return currentIncrement; + } + /** * Returns the nucleic acid increment. Counter will be a 2 digit integer value range * from 01-99 (values less < 10 are filled in with zeros '0' to preserve 2-digit format). @@ -812,6 +826,14 @@ private Integer resolveNextNucleicAcidIncrement(String primaryId, String stAbbre Set nucAcidCountersByAltId = parseMatchingNucleicAcidCountersFromSampleLabels(stAbbrev, nucAcidAbbrev, samplesByAltId); + // if primary id exists in the set of samples by alt id then store nuc acid counter for reference + Integer existingNucAcidCounter = null; + for (SampleMetadata s : samplesByAltId) { + if (s.getPrimaryId().equals(primaryId)) { + existingNucAcidCounter = parseNucleicAcidCounterFromLabel(s.getCmoSampleName()); + } + } + // easy scenario: length of matching samples given an alt id is 1 and sample matches the // primary id of the sample currently being interrogated then return nucleic acid counter as 1 if (samplesByAltId.size() == 1 && samplesByAltId.get(0).getPrimaryId().equals(primaryId)) { @@ -819,7 +841,7 @@ private Integer resolveNextNucleicAcidIncrement(String primaryId, String stAbbre } // for all other scenarios, resolve next consecutive counter from the parsed set of counters - return getNextNucleicAcidIncrement(nucAcidCountersByAltId); + return getNextNucleicAcidIncrement(nucAcidCountersByAltId, existingNucAcidCounter); } /** @@ -827,7 +849,7 @@ private Integer resolveNextNucleicAcidIncrement(String primaryId, String stAbbre * @param counters * @return Integer */ - private Integer getNextNucleicAcidIncrement(Set counters) { + private Integer getNextNucleicAcidIncrement(Set counters, Integer existingNucAcidCounter) { if (counters.isEmpty() || Collections.min(counters) != 1) { return 1; } @@ -839,11 +861,21 @@ private Integer getNextNucleicAcidIncrement(Set counters) { for (int i = 1; i < sortedCounters.size(); i++) { Integer currentCounter = sortedCounters.get(i); Integer prevCounter = sortedCounters.get(i - 1); + + // if the difference between the counters is > 1 then return the prev counter + 1 if ((currentCounter - prevCounter) > 1) { return prevCounter + 1; - } else { - refCounter = currentCounter; } + + // if the current counter matches the existing nuc acid counter + // then return since the current counter is +1 from the prev counter + // and therefore is already the next consecutive integer + if (existingNucAcidCounter != null && Objects.equals(existingNucAcidCounter, currentCounter)) { + return existingNucAcidCounter; + } + + // move onto the next counter in the list + refCounter = currentCounter; } return refCounter + 1; } diff --git a/src/test/java/org/mskcc/smile/CmoLabelGeneratorServiceTest.java b/src/test/java/org/mskcc/smile/CmoLabelGeneratorServiceTest.java index bc615d4..f3b17c7 100644 --- a/src/test/java/org/mskcc/smile/CmoLabelGeneratorServiceTest.java +++ b/src/test/java/org/mskcc/smile/CmoLabelGeneratorServiceTest.java @@ -9,6 +9,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -337,21 +338,17 @@ public void testChangeInSampleCounter() throws Exception { @Test public void testNextConsecutiveCounter() throws Exception { Set counters = new HashSet<>(Arrays.asList(1, 2, 7)); - Integer nextConsecutiveInt = getNextNucleicAcidIncrement(counters); - Assertions.assertEquals(3, nextConsecutiveInt); + Integer nextConsecutiveInt1 = getNextNucleicAcidIncrement(counters, null); + Assertions.assertEquals(3, nextConsecutiveInt1); + + Integer nextConsecutiveInt2 = getNextNucleicAcidIncrement(counters, 2); + Assertions.assertEquals(2, nextConsecutiveInt2); } - private Integer getNextNucleicAcidIncrement(Set counters) { - if (counters.isEmpty()) { + private Integer getNextNucleicAcidIncrement(Set counters, Integer existingNucAcidCounter) { + if (counters.isEmpty() || Collections.min(counters) != 1) { return 1; } - if (counters.size() == 1) { - if (Collections.min(counters) != 1) { - return 1; - } else { - return 2; - } - } List sortedCounters = Arrays.asList(counters.toArray(Integer[]::new)); Collections.sort(sortedCounters); @@ -360,11 +357,21 @@ private Integer getNextNucleicAcidIncrement(Set counters) { for (int i = 1; i < sortedCounters.size(); i++) { Integer currentCounter = sortedCounters.get(i); Integer prevCounter = sortedCounters.get(i - 1); + + // if the difference between the counters is > 1 then return the prev counter + 1 if ((currentCounter - prevCounter) > 1) { return prevCounter + 1; - } else { - refCounter = currentCounter; } + + // if the current counter matches the existing nuc acid counter + // then return since the current counter is +1 from the prev counter + // and therefore is already the next consecutive integer + if (existingNucAcidCounter != null && Objects.equals(existingNucAcidCounter, currentCounter)) { + return existingNucAcidCounter; + } + + // move onto the next counter in the list + refCounter = currentCounter; } return refCounter + 1; }