Skip to content
This repository has been archived by the owner on Jun 20, 2024. It is now read-only.

Commit

Permalink
- since there is no "call cacheing" I need to split this into several…
Browse files Browse the repository at this point in the history
… parts:

-- simulate and generate bams
-- downsample bam and contaminate bams
-- downsample markers and estimate contamination
  • Loading branch information
yfarjoun committed Feb 12, 2021
1 parent cb51e35 commit 90e526b
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,21 @@ class ContaminateAndEvaluateVBID
val rando = new CopyPS_FromBedToVcf(in = vcf, out = out.resolve(prefix + vcf.getFileName + ".with.pl.vcf"),
dpHeader = dpHeader, pathToBed = makeBed.out)

val normalize = new LeftAlignAndTrimVariants(in = rando.out, out = out.resolve(prefix + vcf.getFileName + ".normalized.vcf"), ref = ref, splitMultiAlleic = Some(true))
val subsetToPL = new subsetToPL(in=rando.out, out=out.resolve(prefix + vcf.getFileName + ".subsetToPL.vcf"))
val normalize = new LeftAlignAndTrimVariants(in = subsetToPL.out, out = out.resolve(prefix + vcf.getFileName + ".normalized.vcf"), ref = ref, splitMultiAlleic = Some(true))
val index = new IndexVariants(in = normalize.out)
val simulate = new DWGSim(vcf = normalize.out, fasta = ref, outPrefix = out.resolve(prefix + vcf.getFileName + ".sim"), depth = coverage, coverageTarget = targets)
val bwa = new BwaMem(fastq = simulate.outputPairedFastq, ref = ref, maxThreads = 1, memory = Memory("2G"))
val tmpBam = out.resolve(prefix + vcf.getFileName + ".tmp.bam")
val sort = new SortSam(in = Io.StdIn, out = tmpBam, sortOrder = SortOrder.coordinate) with Configuration {
val bwa = new BwaMem(fastq = simulate.outputPairedFastq, out=Some(out.resolve(prefix + vcf.getFileName + ".tmp.bam")) ,
ref = ref, maxThreads = 1, memory = Memory("2G"))
val sort = new SortSam(in = bwa.out.get, out = out.resolve(prefix + vcf.getFileName + ".sorted.bam"), sortOrder = SortOrder.coordinate) with Configuration {
requires(Cores(1), Memory("2G"))
}

root ==> toTable ==> makeBed ==> rando ==> normalize ==> index ==> simulate ==> (bwa | sort) ==> makeBamsLoop
root ==> toTable ==> makeBed ==> rando ==> subsetToPL ==>
normalize ==> index ==> simulate ==> bwa ==> sort ==> makeBamsLoop


// ~/gatk/gatk SelectVariants -V test2HG003_GRCh38_1_22_v4.2_benchmark.vcf.gz.normalized.vcf -O test2.vcf --sites-only-vcf-output

tmpBam
sort.out
})

val countVariants = new CountVariants(vbidResource, out.resolve(prefix + ".vbid.count"))
Expand Down Expand Up @@ -185,12 +185,29 @@ private class CopyPS_FromBedToVcf(val in: PathToVcf,
"-a" :: pathToBed ::
"-h" :: dpHeader ::
"-c" :: "CHROM,FROM,TO,pl" ::
"-x" :: "FORMAT/PS" :: // remove the PS field since it's invalid
"-x" :: "FORMAT" :: // remove the FORMAT Field
"-x" :: "^INFO/pl" :: // no need for all these annotations anyway
"--force" :: // needed since the input file is corrupt.
"-o" :: out ::
in :: Nil
}


// copies the value from the bed file to the vcf
// removed the PS field (since it's invalid)
private class subsetToPL(val in: PathToVcf, val out: Path
) extends ProcessTask with FixedResources with Configuration {
requires(Cores(1), Memory("1G"))

private val bcftools: Path = configureExecutable("bcftools.executable", "bcftools")

override def args: Seq[Any] = bcftools :: "view" ::
"-i" :: "pl!=\".\"" ::
"-o" :: out ::
in :: Nil
}


// copies the value from the bed file to the vcf
private class MakePLBed(val table: Path,
val out: Path
Expand Down
5 changes: 3 additions & 2 deletions tasks/src/main/scala/dagr/tasks/bwa/BwaMem.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import DagrDef.{PathToBam, PathToFasta, PathToFastq}
import scala.collection.mutable.ListBuffer

class BwaMem(fastq: PathToFastq = Io.StdIn,
out: Option[PathToBam] = None,
val out: Option[PathToBam] = None,
ref: PathToFasta,
minSeedLength: Option[Int] = None,
matchScore: Option[Int] = None,
Expand Down Expand Up @@ -65,7 +65,8 @@ class BwaMem(fastq: PathToFastq = Io.StdIn,
clippingPenalties.foreach { case (five, three) => buffer.append("-L", s"$five,$three") }
minScore.foreach(s => buffer.append("-T", s))

buffer.append(ref, fastq, out.map(f => "> " + f))
buffer.append(ref, fastq)
out.foreach(f => buffer.append( "> " + f))
buffer.toList
}
}

0 comments on commit 90e526b

Please sign in to comment.