Skip to content

Commit

Permalink
Add strand stats and report experiment strandness - #3 #6
Browse files Browse the repository at this point in the history
  • Loading branch information
emi80 committed Oct 4, 2019
1 parent 2ae0bf8 commit 884aed4
Show file tree
Hide file tree
Showing 3 changed files with 278 additions and 3 deletions.
3 changes: 2 additions & 1 deletion process.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import (
"sync"
"time"

log "github.com/sirupsen/logrus"
"github.com/biogo/hts/bam"
"github.com/guigolab/bamstats/annotation"
"github.com/guigolab/bamstats/config"
"github.com/guigolab/bamstats/sam"
"github.com/guigolab/bamstats/stats"
log "github.com/sirupsen/logrus"
)

func init() {
Expand Down Expand Up @@ -148,6 +148,7 @@ func makeStatsMap(index *annotation.RtreeMap, cfg *config.Config) stats.Map {
m.Add(stats.NewCoverageStats(index, true))
}
m.Add(stats.NewIHECstats(index))
m.Add(stats.NewStrandStats(index))
}
return m
}
4 changes: 2 additions & 2 deletions process_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ var (
"coverageUniq": "data/expected-coverage-uniq.json",
"rnaseq": "data/expected-rnaseq.json",
}
expectedMapLenCoverage = 3
expectedMapLenCoverageUniq = 4
expectedMapLenCoverage = 4
expectedMapLenCoverageUniq = 5
annotationFiles = []string{"data/coverage-test.bed", "data/coverage-test.gtf.gz", "data/coverage-test-shuffled.bed", "data/coverage-test-shuffled.gtf.gz"}
maxBuf = 1000000
reads = -1
Expand Down
274 changes: 274 additions & 0 deletions stats/strand.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
package stats

import (
"math"

"github.com/guigolab/bamstats/annotation"
"github.com/guigolab/bamstats/sam"
)

var (
strandMap = map[string]int8{
"+": +1,
"-": -1,
}
threshold = 0.7
)

// SingleStrandStats represents strand statistics for single end reads
type SingleStrandStats struct {
MapPlusGenePlus uint64 `json:"++"`
MapPlusGeneMinus uint64 `json:"+-"`
MapMinusGeneMinus uint64 `json:"--"`
MapMinusGenePlus uint64 `json:"-+"`
}

// Update updates all counts from another StrandStats instance.
func (s *SingleStrandStats) Update(other *SingleStrandStats) {
s.MapPlusGenePlus += other.MapPlusGenePlus
s.MapPlusGeneMinus += other.MapPlusGeneMinus
s.MapMinusGenePlus += other.MapMinusGenePlus
s.MapMinusGeneMinus += other.MapMinusGeneMinus
}

// Total return the total sum of all counts
func (s *SingleStrandStats) Total() uint64 {
return s.MapPlusGenePlus +
s.MapPlusGeneMinus +
s.MapMinusGenePlus +
s.MapMinusGeneMinus
}

// PairedStrandStats represents strand statistics for paired end reads
type PairedStrandStats struct {
FirstMapPlusGenePlus uint64 `json:"1++"`
FirstMapPlusGeneMinus uint64 `json:"1+-"`
FirstMapMinusGeneMinus uint64 `json:"1--"`
FirstMapMinusGenePlus uint64 `json:"1-+"`
SecondMapPlusGenePlus uint64 `json:"2++"`
SecondMapPlusGeneMinus uint64 `json:"2+-"`
SecondMapMinusGeneMinus uint64 `json:"2--"`
SecondMapMinusGenePlus uint64 `json:"2-+"`
}

// Update updates all counts from another StrandStats instance.
func (s *PairedStrandStats) Update(other *PairedStrandStats) {
s.FirstMapPlusGenePlus += other.FirstMapPlusGenePlus
s.FirstMapPlusGeneMinus += other.FirstMapPlusGeneMinus
s.FirstMapMinusGenePlus += other.FirstMapMinusGenePlus
s.FirstMapMinusGeneMinus += other.FirstMapMinusGeneMinus
s.SecondMapPlusGenePlus += other.SecondMapPlusGenePlus
s.SecondMapPlusGeneMinus += other.SecondMapPlusGeneMinus
s.SecondMapMinusGenePlus += other.SecondMapMinusGenePlus
s.SecondMapMinusGeneMinus += other.SecondMapMinusGeneMinus
}

// Total returns the sum of all counts
func (s *PairedStrandStats) Total() uint64 {
return s.FirstMapPlusGenePlus +
s.FirstMapPlusGeneMinus +
s.FirstMapMinusGenePlus +
s.FirstMapMinusGeneMinus +
s.SecondMapPlusGenePlus +
s.SecondMapPlusGeneMinus +
s.SecondMapMinusGenePlus +
s.SecondMapMinusGeneMinus
}

// StrandStats represents strand statistics
type StrandStats struct {
Total uint64 `json:"total"`
spec1, spec2 float64
Failed float64 `json:"failed"`
Strandness string `json:"strandness"`
Reads *SingleStrandStats `json:"reads,omitempty"`
Pairs *PairedStrandStats `json:"pairs,omitempty"`
index *annotation.RtreeMap
}

// Collect collects strand statistics from a sam.Record.
func (s *StrandStats) Collect(record *sam.Record) {
NH, hasNH := record.Tag([]byte("NH"))
multiMap := hasNH && NH.Value().(uint8) > 1
if s.index == nil || !record.IsPrimary() || record.IsUnmapped() || record.IsDuplicate() || multiMap {
return
}
rtree := s.index.Get(record.Ref.Name())
if rtree == nil || rtree.Size() == 0 {
return
}
loc := annotation.NewLocation(record.Ref.Name(), record.Start(), record.End())
results := annotation.QueryIndex(rtree, loc.Start(), loc.End())

check := make(map[int8]uint8)
for _, r := range results {
if f, ok := r.(*annotation.Feature); ok {
start := math.Max(loc.Start(), f.Start())
end := math.Min(loc.End(), f.End())
if end <= start {
continue
}
f := r.(*annotation.Feature)
if f.Element() != "gene" {
continue
}
if record.Strand() == strandMap[f.Strand()] {
check[1]++
} else {
check[-1]++
}
}
}
if check[1] == 0 && check[-1] == 0 {
return
}
s.Total++
if check[1] > 0 {
if record.IsPaired() {
if s.Pairs == nil {
s.Pairs = &PairedStrandStats{}
}
if record.IsRead1() {
if record.Strand() > 0 {
s.Pairs.FirstMapPlusGenePlus++
}
if record.Strand() < 0 {
s.Pairs.FirstMapMinusGeneMinus++
}
}
if record.IsRead2() {
if record.Strand() > 0 {
s.Pairs.SecondMapPlusGenePlus++
}
if record.Strand() < 0 {
s.Pairs.SecondMapMinusGeneMinus++
}
}
} else {
if s.Reads == nil {
s.Reads = &SingleStrandStats{}
}
if record.Strand() > 0 {
s.Reads.MapPlusGenePlus++
}
if record.Strand() < 0 {
s.Reads.MapMinusGeneMinus++
}
}
} else {
if check[-1] > 0 {
if record.IsPaired() {
if s.Pairs == nil {
s.Pairs = &PairedStrandStats{}
}
if record.IsRead1() {
if record.Strand() > 0 {
s.Pairs.FirstMapPlusGeneMinus++
}
if record.Strand() < 0 {
s.Pairs.FirstMapMinusGenePlus++
}
}
if record.IsRead2() {
if record.Strand() > 0 {
s.Pairs.SecondMapPlusGeneMinus++
}
if record.Strand() < 0 {
s.Pairs.SecondMapMinusGenePlus++
}
}
} else {
if s.Reads == nil {
s.Reads = &SingleStrandStats{}
}
if record.Strand() > 0 {
s.Reads.MapPlusGeneMinus++
}
if record.Strand() < 0 {
s.Reads.MapMinusGenePlus++
}
}
}
}
}

// Type returns the stats type
func (s *StrandStats) Type() string {
return "strand"
}

// Update updates all counts from another Stats instance.
func (s *StrandStats) Update(other Stats) {
if other, ok := other.(*StrandStats); ok {
if other.Reads != nil {
if s.Reads == nil {
s.Reads = &SingleStrandStats{}
}
s.Reads.Update(other.Reads)
}
if other.Pairs != nil {
if s.Pairs == nil {
s.Pairs = &PairedStrandStats{}
}
s.Pairs.Update(other.Pairs)
}
s.Total += other.Total
s.Finalize()
}
}

// Merge update counts from a channel of Stats instances.
func (s *StrandStats) Merge(others chan Stats) {
for other := range others {
s.Update(other)
}
}

// Finalize updates dependent counts of a CoverageStats instance.
func (s *StrandStats) Finalize() {
if s.Reads == nil && s.Pairs == nil {
return
}
var spec1, spec2, total uint64
if s.Reads != nil {
spec1 = s.Reads.MapPlusGenePlus + s.Reads.MapMinusGeneMinus
spec2 = s.Reads.MapPlusGeneMinus + s.Reads.MapMinusGenePlus
total = s.Reads.Total()
} else {
if s.Pairs != nil {
spec1 = s.Pairs.FirstMapPlusGenePlus + s.Pairs.FirstMapMinusGeneMinus + s.Pairs.SecondMapPlusGeneMinus + s.Pairs.SecondMapMinusGenePlus
spec2 = s.Pairs.FirstMapPlusGeneMinus + s.Pairs.FirstMapMinusGenePlus + s.Pairs.SecondMapPlusGenePlus + s.Pairs.SecondMapMinusGeneMinus
total = s.Pairs.Total()
}
}
s.spec1 = float64(spec1) / float64(total)
s.spec2 = float64(spec2) / float64(total)
s.Failed = 1 - s.spec1 - s.spec2
if s.Reads != nil {
if s.spec1 > threshold {
s.Strandness = "SENSE"
} else {
if s.spec2 > threshold {
s.Strandness = "ANTISENSE"
}
}
} else {
if s.Pairs != nil {
if s.spec1 > threshold {
s.Strandness = "MATE1_SENSE"
} else {
if s.spec2 > threshold {
s.Strandness = "MATE2_SENSE"
}
}
}
}
}

// NewStrandStats returns a new instance of a StrandStats object
func NewStrandStats(index *annotation.RtreeMap) *StrandStats {
return &StrandStats{
index: index,
Strandness: "NONE",
}
}

0 comments on commit 884aed4

Please sign in to comment.