Skip to content

Commit

Permalink
Strand bias calculated only from supporting reads - read pairs are ex…
Browse files Browse the repository at this point in the history
…cluded
  • Loading branch information
d-cameron committed Aug 2, 2018
1 parent 3e96595 commit 4f61685
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 24 deletions.
15 changes: 9 additions & 6 deletions src/main/java/au/edu/wehi/idsv/AssemblyAttributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@
import au.edu.wehi.idsv.sam.ChimericAlignment;
import au.edu.wehi.idsv.sam.SamTags;
import au.edu.wehi.idsv.util.MessageThrottler;
import com.google.common.collect.Lists;
import com.google.common.collect.Range;
import com.google.common.collect.Streams;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.Log;
import org.apache.commons.lang3.ArrayUtils;

import java.util.*;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -116,9 +113,12 @@ private static int maxReadLength(Collection<DirectedEvidence> support) {
.max()
.orElse(0);
}
private static float strandBias(Collection<DirectedEvidence> support) {
private static float readStrandBias(Collection<DirectedEvidence> fullSupport) {
List<DirectedEvidence> support = fullSupport.stream()
.filter(s -> s instanceof SingleReadEvidence)
.collect(Collectors.toList());
if (support.size() == 0) {
return 0;
return 0.5f;
}
return (float)support.stream()
.mapToDouble(e -> e.getStrandBias())
Expand Down Expand Up @@ -147,7 +147,7 @@ public static void annotateAssembly(ProcessingContext context, SAMRecord record,

record.setAttribute(SamTags.IS_ASSEMBLY, (byte)1);
record.setAttribute(SamTags.ASSEMBLY_MAX_READ_LENGTH, maxReadLength(support));
record.setAttribute(SamTags.ASSEMBLY_STRAND_BIAS, strandBias(support));
record.setAttribute(SamTags.ASSEMBLY_STRAND_BIAS, readStrandBias(support));
ensureUniqueEvidenceID(record.getReadName(), support);
// TODO: proper mapq model
record.setMappingQuality(maxLocalMapq(support));
Expand Down Expand Up @@ -260,6 +260,9 @@ public int getMaxQualPosition(Range<Integer> assemblyContigOffset, Set<Integer>
}
return bestPos;
}
public int getSupportingReadCount(Range<Integer> assemblyContigOffset, Set<Integer> supportingCategories, Set<AssemblyEvidenceSupport.SupportType> supportTypes) {
return (int)filterSupport(assemblyContigOffset, supportingCategories, supportTypes).count();
}
public int getSupportingReadCount(int assemblyContigOffset, Set<Integer> supportingCategories, Set<AssemblyEvidenceSupport.SupportType> supportTypes) {
return (int)filterSupport(Range.closed(assemblyContigOffset, assemblyContigOffset), supportingCategories, supportTypes).count();
}
Expand Down
31 changes: 21 additions & 10 deletions src/main/java/au/edu/wehi/idsv/StructuralVariationCallBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import htsjdk.samtools.util.Log;
import htsjdk.variant.variantcontext.VariantContextBuilder;

import static picard.fingerprint.DiploidHaplotype.aa;

public class StructuralVariationCallBuilder extends IdsvVariantContextBuilder {
private static final Log log = Log.getInstance(StructuralVariationCallBuilder.class);
private final ProcessingContext processContext;
Expand Down Expand Up @@ -328,18 +330,27 @@ public VariantContextDirectedEvidence make() {
attribute(VcfInfoAttributes.BREAKEND_ASSEMBLY_READPAIR_COUNT.attribute(), IntStream.of(basrp).sum());
attribute(VcfInfoAttributes.BREAKPOINT_VARIANT_FRAGMENTS.attribute(), IntStream.of(supportingBreakpointFragments).sum());
attribute(VcfInfoAttributes.BREAKEND_VARIANT_FRAGMENTS.attribute(), IntStream.of(supportingBreakendFragments).sum());

if (supportingBreakpoint.size() > 0) {
int totalSupport = supportingBreakpoint.stream().mapToInt(e -> e.constituentReads()).sum();
if (totalSupport != 0) {
attribute(VcfInfoAttributes.STRAND_BIAS.attribute(), supportingBreakpoint.stream().mapToDouble(e -> e.getStrandBias() * e.constituentReads()).sum() / totalSupport);
}
} else {
int totalSupport = supportingBreakend.stream().mapToInt(e -> e.constituentReads()).sum();
if (totalSupport != 0) {
attribute(VcfInfoAttributes.STRAND_BIAS.attribute(), supportingBreakend.stream().mapToDouble(e -> e.getStrandBias() * e.constituentReads()).sum() / totalSupport);

// Calculate strand bias purely from direct read support
int reads = 0;
float strandReads = 0;
for (DirectedEvidence de : Iterables.concat(supportingBreakpoint, supportingBreakend)) {
if (de instanceof SingleReadEvidence) {
SingleReadEvidence e = (SingleReadEvidence) de;
if (AssemblyAttributes.isAssembly(e)) {
AssemblyAttributes aa = aaLookup.get(de);
int asmReads = aa.getSupportingReadCount(null, null, ImmutableSet.of(AssemblyEvidenceSupport.SupportType.Read));
reads += asmReads;
strandReads += aa.getStrandBias() * asmReads;
} else {
reads++;
strandReads += e.getStrandBias();
}
}
}
if (reads > 0) {
attribute(VcfInfoAttributes.STRAND_BIAS.attribute(), strandReads / reads);
}
List<String> breakendIds = Stream.concat(Stream.concat(Stream.concat(supportingAS.stream(), supportingRAS.stream()), supportingCAS.stream()), supportingBAS.stream())
.map(e -> e.getSAMRecord().getReadName())
.distinct()
Expand Down
14 changes: 6 additions & 8 deletions src/main/java/au/edu/wehi/idsv/vcf/VcfInfoAttributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,12 @@ public enum VcfInfoAttributes {
BREAKEND_VARIANT_FRAGMENTS ("BVF", 1, VCFHeaderLineType.Integer, "Count of fragments providing breakend for the variant allele."),

CONFIDENCE_INTERVAL_REMOTE_BREAKEND_START_POSITION_KEY ("CIRPOS", 2, VCFHeaderLineType.Integer, "Confidence interval around remote breakend POS for imprecise variants"),
STRAND_BIAS ("SB", 1, VCFHeaderLineType.Float, "Strand bias of reads supporting the variant."
+ "1 indicates that breakend bases would be aligned to the positive strand if the reference was changed to the variant allele. "
+ "0 indicates that breakend bases would be aligned to the negative strand if the reference was changed to the variant allele. "
+ "For read alignments, this corresponds the strand of the read alignment. "
+ "A SR that is aligned to the positive strand will have a strand bias of 1. "
+ "For read pairs in FR orientation, this is the opposite strand to the local anchoring read alignment. "
+ "A RP that is aligned to the positive strand will have a strand bias of 0 because the mate is providing the support and is expected to align to the negative strand. "
+ "Note that reads both directly supporting the variant, and supporting via assembly will be double-counted"),
STRAND_BIAS ("SB", 1, VCFHeaderLineType.Float, "Strand bias of the reads supporting the variant."
+ "1 indicates that reads would be aligned to the positive strand if the reference was changed to the variant allele. "
+ "0 indicates that reads bases would be aligned to the negative strand if the reference was changed to the variant allele. "
+ "Strand bias is calculated purely from supporting reads and exclude read pair support since these are 100% strand bias. "
+ "Note that reads both directly supporting the variant, and supporting via assembly will be double-counted. "
+ "Both breakpoint and breakend supporting reads are included."),
SELF_INTERSECTING ("SELF", 1, VCFHeaderLineType.Flag, "Indicates a breakpoint is self-intersecting"),
SUPPORT_INTERVAL ("SI", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Support interval offsets from breakend position in which at least one supporting read/read pair/assembly is mapped."),
REMOTE_SUPPORT_INTERVAL ("RSI", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Support interval offsets of partner breakend."),
Expand Down

0 comments on commit 4f61685

Please sign in to comment.