diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/AbstractReadThreadingGraph.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/AbstractReadThreadingGraph.java index 241a50a7ace..e8ac59f8719 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/AbstractReadThreadingGraph.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/AbstractReadThreadingGraph.java @@ -9,7 +9,6 @@ import htsjdk.samtools.util.Locatable; import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; -import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.Kmer; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.BaseGraph; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KmerSearchableGraph; @@ -493,19 +492,20 @@ private Pair bestPrefixMatch(final List cigarEle int readIdx = path2.length - 1; // NOTE: this only works when the last cigar element has a sufficient number of M bases, so no indels within min-mismatches of the edge. + cigarLoop: for (final CigarElement ce : Lists.reverse(cigarElements)) { if (!(ce.getOperator().consumesReadBases() && ce.getOperator().consumesReferenceBases())) { break; } for (int j = 0; j < ce.getLength(); j++, refIdx--, readIdx--) { if (path1[refIdx] != path2[readIdx]) { - break; + break cigarLoop; } } } final int matches = path2.length - 1 - readIdx; - return matches < minMismatchingBases ? Pair.of(-1,-1) : Pair.of(readIdx, refIdx); + return matches < minMismatchingBases ? Pair.of(-1,-1) : Pair.of(refIdx, readIdx); } /** @@ -608,7 +608,7 @@ int mergeDanglingHeadLegacy(final DanglingChainMergeHelper danglingHeadMergeResu // but we can manipulate the dangling path if we need to if (indexesToMerge >= danglingHeadMergeResult.danglingPath.size() && - !extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge - danglingHeadMergeResult.danglingPath.size() + 2)) { + !extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge - danglingHeadMergeResult.danglingPath.size() + 2, elements)) { return 0; } @@ -643,11 +643,11 @@ int mergeDanglingHead(final DanglingChainMergeHelper danglingHeadMergeResult) { // but we can manipulate the dangling path if we need to if ( indexesToMerge.getValue() >= danglingHeadMergeResult.danglingPath.size() && - ! extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge.getValue() - danglingHeadMergeResult.danglingPath.size() + 2) ) { + ! extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge.getValue() - danglingHeadMergeResult.danglingPath.size() + 2, elements) ) { return 0; } - addEdge(danglingHeadMergeResult.referencePath.get(indexesToMerge.getKey()), danglingHeadMergeResult.danglingPath.get(indexesToMerge.getValue()), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1)); + addEdge(danglingHeadMergeResult.referencePath.get(indexesToMerge.getKey() + 1), danglingHeadMergeResult.danglingPath.get(indexesToMerge.getValue()), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1)); return 1; } @@ -898,10 +898,11 @@ private int getMaxMismatchesLegacy(final int lengthOfDanglingBranch) { return maxMismatchesInDanglingHead > 0 ? maxMismatchesInDanglingHead : Math.max(1, (lengthOfDanglingBranch / kmerSize)); } - private boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend) { + private boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend, List elements) { final int indexOfLastDanglingNode = danglingHeadMergeResult.danglingPath.size() - 1; - final int indexOfRefNodeToUse = indexOfLastDanglingNode + numNodesToExtend; + final int offsetForRefEndToDanglingEnd = elements.stream().mapToInt(ce -> (ce.getOperator().consumesReferenceBases()? ce.getLength() : 0) - (ce.getOperator().consumesReadBases()? ce.getLength() : 0)).sum(); + final int indexOfRefNodeToUse = indexOfLastDanglingNode + offsetForRefEndToDanglingEnd + numNodesToExtend; if (indexOfRefNodeToUse >= danglingHeadMergeResult.referencePath.size()) { return false; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java index cd13cd90cc5..61d09dca997 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java @@ -502,6 +502,12 @@ public Object[][] makeDanglingHeadsData() { tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "AYCGGTTACGT", "7M", true, -1}); // very little data tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "YCCGGTTACGT", "6M", true, -1}); // begins in mismatch + //Testing new behavior with indels long enough to confuse the code + tests.add(new Object[]{"XXXXXXXAACBBBBBBCGGTTACGT", "AACCGGTTACGT", "5M6D3M", true, 1}); // long deletion + tests.add(new Object[]{"XXXXXXXAACCBBBBBBGGTTACGT", "ATCCGGTTACGT", "5M6D4M", true, 1}); // long deletion plus close snp + tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "XAACYYYYYYYCGGTTACGT", "5M7I4M", true, 1}); // 7 base insertion + tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "XTACYYYYYYYCGGTTACGT", "5M7I4M", true, 1}); // 7 base with snp (NOTE: This triggers extendDanglingPathAgainstReference but should fail unless the cigar is used to predict the correct reference vertex to use for merging) + return tests.toArray(new Object[][]{}); } @@ -554,6 +560,13 @@ public void testDanglingHeads(final String ref, final SeqGraph seqGraph = rtgraph.toSequenceGraph(); final List> paths = new GraphBasedKBestHaplotypeFinder<>(seqGraph, seqGraph.getReferenceSourceVertex(), seqGraph.getReferenceSinkVertex()).findBestHaplotypes(); Assert.assertEquals(paths.size(), shouldBeMerged ? 2 : 1); + + // Asserting the contents of the path are reasonable + final List pathsConverted = paths.stream().map(p -> new String(p.getBases())).collect(Collectors.toList()); + Assert.assertTrue(pathsConverted.contains(ref)); + if (shouldBeMerged == true) { + Assert.assertTrue(pathsConverted.stream().anyMatch(p -> p.contains(alt))); + } } /** diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 447e0e1210b..0a7b912c5db 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -135,7 +135,7 @@ public void testDreamTumorNormal(final File tumor, final Optional normal, final List normals = normal.isPresent() ? Collections.singletonList(normal.get()) : Collections.emptyList(); runMutect2(Collections.singletonList(tumor), normals, unfilteredVcf, CHROMOSOME_20, b37Reference, Optional.of(GNOMAD), args -> args.addMask(mask).add(M2ArgumentCollection.F1R2_TAR_GZ_NAME, f1r2Counts), - args -> errorCorrectReads ? args.add(ReadThreadingAssemblerArgumentCollection.PILEUP_ERROR_CORRECTION_LOG_ODDS_NAME, 3.0) : args + args -> errorCorrectReads ? args.add(ReadThreadingAssemblerArgumentCollection.PILEUP_ERROR_CORRECTION_LOG_ODDS_NAME, 3.0) : args ); // verify that alleles contained in likelihoods matrix but dropped from somatic calls do not show up in annotations @@ -316,7 +316,7 @@ public void testDifferentAltsInTumorAndNormal() { Assert.assertTrue(altAllelesByPosition.get(10020042).basesMatch(Allele.ALT_C)); //tumor G->C, normal G->A Assert.assertTrue(altAllelesByPosition.get(10020124).basesMatch(Allele.ALT_G)); //tumor A->G, normal A->T } - + // test on an artificial bam with several contrived MNPs @Test public void testMnps() { @@ -762,7 +762,7 @@ public void testMitochondrialRefConf() { .filter(key -> variantMap.containsKey(key) && variantMap2.containsKey(key)) .collect(Collectors.toList()); Assert.assertFalse(refBlockKeys.isEmpty()); - + refBlockKeys.forEach(key -> Assert.assertTrue(onlyNonRefTlodsChange(variantMap.get(key), variantMap2.get(key), minAF))); } @@ -873,8 +873,8 @@ public void testFilteringHeaders() { @SafeVarargs final private void runMutect2(final List tumors, final List normals, final File output, - final String interval, final String reference, - final Optional gnomad, final Function... appendExtraArguments) { + final String interval, final String reference, + final Optional gnomad, final Function... appendExtraArguments) { final ArgumentsBuilder args = new ArgumentsBuilder() .addOutput(output) .addReference(reference); @@ -901,19 +901,19 @@ final private void runMutect2(final List tumors, final List normals, @SafeVarargs final private void runMutect2(final File tumor, final File normal, final File output, final String interval, final String reference, - final Optional gnomad, final Function... appendExtraArguments) { + final Optional gnomad, final Function... appendExtraArguments) { runMutect2(Collections.singletonList(tumor), Collections.singletonList(normal), output, interval, reference, gnomad, appendExtraArguments); } @SafeVarargs final private void runMutect2(final File tumor, final File output, final String interval, final String reference, - final Optional gnomad, final Function... appendExtraArguments) { + final Optional gnomad, final Function... appendExtraArguments) { runMutect2(Collections.singletonList(tumor), Collections.emptyList(), output, interval, reference, gnomad, appendExtraArguments); } @SafeVarargs final private void runFilterMutectCalls(final File unfilteredVcf, final File filteredVcf, final String reference, - final Function... appendExtraArguments) { + final Function... appendExtraArguments) { final ArgumentsBuilder args = new ArgumentsBuilder() .addVCF(unfilteredVcf) .addOutput(filteredVcf)