Skip to content

Commit

Permalink
Fixed crashes in new dangling head merging code (#7086)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesemery authored Feb 19, 2021
1 parent ae06fb7 commit 8f6c736
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import htsjdk.samtools.util.Locatable;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.Kmer;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.BaseGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KmerSearchableGraph;
Expand Down Expand Up @@ -493,19 +492,20 @@ private Pair<Integer, Integer> bestPrefixMatch(final List<CigarElement> cigarEle
int readIdx = path2.length - 1;

// NOTE: this only works when the last cigar element has a sufficient number of M bases, so no indels within min-mismatches of the edge.
cigarLoop:
for (final CigarElement ce : Lists.reverse(cigarElements)) {
if (!(ce.getOperator().consumesReadBases() && ce.getOperator().consumesReferenceBases())) {
break;
}
for (int j = 0; j < ce.getLength(); j++, refIdx--, readIdx--) {
if (path1[refIdx] != path2[readIdx]) {
break;
break cigarLoop;
}
}
}

final int matches = path2.length - 1 - readIdx;
return matches < minMismatchingBases ? Pair.of(-1,-1) : Pair.of(readIdx, refIdx);
return matches < minMismatchingBases ? Pair.of(-1,-1) : Pair.of(refIdx, readIdx);
}

/**
Expand Down Expand Up @@ -608,7 +608,7 @@ int mergeDanglingHeadLegacy(final DanglingChainMergeHelper danglingHeadMergeResu

// but we can manipulate the dangling path if we need to
if (indexesToMerge >= danglingHeadMergeResult.danglingPath.size() &&
!extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge - danglingHeadMergeResult.danglingPath.size() + 2)) {
!extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge - danglingHeadMergeResult.danglingPath.size() + 2, elements)) {
return 0;
}

Expand Down Expand Up @@ -643,11 +643,11 @@ int mergeDanglingHead(final DanglingChainMergeHelper danglingHeadMergeResult) {

// but we can manipulate the dangling path if we need to
if ( indexesToMerge.getValue() >= danglingHeadMergeResult.danglingPath.size() &&
! extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge.getValue() - danglingHeadMergeResult.danglingPath.size() + 2) ) {
! extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge.getValue() - danglingHeadMergeResult.danglingPath.size() + 2, elements) ) {
return 0;
}

addEdge(danglingHeadMergeResult.referencePath.get(indexesToMerge.getKey()), danglingHeadMergeResult.danglingPath.get(indexesToMerge.getValue()), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
addEdge(danglingHeadMergeResult.referencePath.get(indexesToMerge.getKey() + 1), danglingHeadMergeResult.danglingPath.get(indexesToMerge.getValue()), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));

return 1;
}
Expand Down Expand Up @@ -898,10 +898,11 @@ private int getMaxMismatchesLegacy(final int lengthOfDanglingBranch) {
return maxMismatchesInDanglingHead > 0 ? maxMismatchesInDanglingHead : Math.max(1, (lengthOfDanglingBranch / kmerSize));
}

private boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend) {
private boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend, List<CigarElement> elements) {

final int indexOfLastDanglingNode = danglingHeadMergeResult.danglingPath.size() - 1;
final int indexOfRefNodeToUse = indexOfLastDanglingNode + numNodesToExtend;
final int offsetForRefEndToDanglingEnd = elements.stream().mapToInt(ce -> (ce.getOperator().consumesReferenceBases()? ce.getLength() : 0) - (ce.getOperator().consumesReadBases()? ce.getLength() : 0)).sum();
final int indexOfRefNodeToUse = indexOfLastDanglingNode + offsetForRefEndToDanglingEnd + numNodesToExtend;
if (indexOfRefNodeToUse >= danglingHeadMergeResult.referencePath.size()) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,12 @@ public Object[][] makeDanglingHeadsData() {
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "AYCGGTTACGT", "7M", true, -1}); // very little data
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "YCCGGTTACGT", "6M", true, -1}); // begins in mismatch

//Testing new behavior with indels long enough to confuse the code
tests.add(new Object[]{"XXXXXXXAACBBBBBBCGGTTACGT", "AACCGGTTACGT", "5M6D3M", true, 1}); // long deletion
tests.add(new Object[]{"XXXXXXXAACCBBBBBBGGTTACGT", "ATCCGGTTACGT", "5M6D4M", true, 1}); // long deletion plus close snp
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "XAACYYYYYYYCGGTTACGT", "5M7I4M", true, 1}); // 7 base insertion
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "XTACYYYYYYYCGGTTACGT", "5M7I4M", true, 1}); // 7 base with snp (NOTE: This triggers extendDanglingPathAgainstReference but should fail unless the cigar is used to predict the correct reference vertex to use for merging)

return tests.toArray(new Object[][]{});
}

Expand Down Expand Up @@ -554,6 +560,13 @@ public void testDanglingHeads(final String ref,
final SeqGraph seqGraph = rtgraph.toSequenceGraph();
final List<KBestHaplotype<SeqVertex, BaseEdge>> paths = new GraphBasedKBestHaplotypeFinder<>(seqGraph, seqGraph.getReferenceSourceVertex(), seqGraph.getReferenceSinkVertex()).findBestHaplotypes();
Assert.assertEquals(paths.size(), shouldBeMerged ? 2 : 1);

// Asserting the contents of the path are reasonable
final List<String> pathsConverted = paths.stream().map(p -> new String(p.getBases())).collect(Collectors.toList());
Assert.assertTrue(pathsConverted.contains(ref));
if (shouldBeMerged == true) {
Assert.assertTrue(pathsConverted.stream().anyMatch(p -> p.contains(alt)));
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ public void testDreamTumorNormal(final File tumor, final Optional<File> normal,
final List<File> normals = normal.isPresent() ? Collections.singletonList(normal.get()) : Collections.emptyList();
runMutect2(Collections.singletonList(tumor), normals, unfilteredVcf, CHROMOSOME_20, b37Reference, Optional.of(GNOMAD),
args -> args.addMask(mask).add(M2ArgumentCollection.F1R2_TAR_GZ_NAME, f1r2Counts),
args -> errorCorrectReads ? args.add(ReadThreadingAssemblerArgumentCollection.PILEUP_ERROR_CORRECTION_LOG_ODDS_NAME, 3.0) : args
args -> errorCorrectReads ? args.add(ReadThreadingAssemblerArgumentCollection.PILEUP_ERROR_CORRECTION_LOG_ODDS_NAME, 3.0) : args
);

// verify that alleles contained in likelihoods matrix but dropped from somatic calls do not show up in annotations
Expand Down Expand Up @@ -316,7 +316,7 @@ public void testDifferentAltsInTumorAndNormal() {
Assert.assertTrue(altAllelesByPosition.get(10020042).basesMatch(Allele.ALT_C)); //tumor G->C, normal G->A
Assert.assertTrue(altAllelesByPosition.get(10020124).basesMatch(Allele.ALT_G)); //tumor A->G, normal A->T
}

// test on an artificial bam with several contrived MNPs
@Test
public void testMnps() {
Expand Down Expand Up @@ -762,7 +762,7 @@ public void testMitochondrialRefConf() {
.filter(key -> variantMap.containsKey(key) && variantMap2.containsKey(key))
.collect(Collectors.toList());
Assert.assertFalse(refBlockKeys.isEmpty());

refBlockKeys.forEach(key -> Assert.assertTrue(onlyNonRefTlodsChange(variantMap.get(key), variantMap2.get(key), minAF)));
}

Expand Down Expand Up @@ -873,8 +873,8 @@ public void testFilteringHeaders() {

@SafeVarargs
final private void runMutect2(final List<File> tumors, final List<File> normals, final File output,
final String interval, final String reference,
final Optional<File> gnomad, final Function<ArgumentsBuilder, ArgumentsBuilder>... appendExtraArguments) {
final String interval, final String reference,
final Optional<File> gnomad, final Function<ArgumentsBuilder, ArgumentsBuilder>... appendExtraArguments) {
final ArgumentsBuilder args = new ArgumentsBuilder()
.addOutput(output)
.addReference(reference);
Expand All @@ -901,19 +901,19 @@ final private void runMutect2(final List<File> tumors, final List<File> normals,

@SafeVarargs
final private void runMutect2(final File tumor, final File normal, final File output, final String interval, final String reference,
final Optional<File> gnomad, final Function<ArgumentsBuilder, ArgumentsBuilder>... appendExtraArguments) {
final Optional<File> gnomad, final Function<ArgumentsBuilder, ArgumentsBuilder>... appendExtraArguments) {
runMutect2(Collections.singletonList(tumor), Collections.singletonList(normal), output, interval, reference, gnomad, appendExtraArguments);
}

@SafeVarargs
final private void runMutect2(final File tumor, final File output, final String interval, final String reference,
final Optional<File> gnomad, final Function<ArgumentsBuilder, ArgumentsBuilder>... appendExtraArguments) {
final Optional<File> gnomad, final Function<ArgumentsBuilder, ArgumentsBuilder>... appendExtraArguments) {
runMutect2(Collections.singletonList(tumor), Collections.emptyList(), output, interval, reference, gnomad, appendExtraArguments);
}

@SafeVarargs
final private void runFilterMutectCalls(final File unfilteredVcf, final File filteredVcf, final String reference,
final Function<ArgumentsBuilder, ArgumentsBuilder>... appendExtraArguments) {
final Function<ArgumentsBuilder, ArgumentsBuilder>... appendExtraArguments) {
final ArgumentsBuilder args = new ArgumentsBuilder()
.addVCF(unfilteredVcf)
.addOutput(filteredVcf)
Expand Down

0 comments on commit 8f6c736

Please sign in to comment.