Skip to content

Commit

Permalink
Merge pull request #83 from cgat-developers/AC_insert_size
Browse files Browse the repository at this point in the history
Ac insert size
  • Loading branch information
Adam Cribbs authored Dec 16, 2018
2 parents dddef8f + 7f47d28 commit 06f35b7
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 1 deletion.
19 changes: 19 additions & 0 deletions cgatpipelines/tasks/bamstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def buildPicardInsertSizeStats(infile, outfile, genome_file,
INPUT=%(infile)s
REFERENCE_SEQUENCE=%(genome_file)s
ASSUME_SORTED=true
HISTOGRAM_FILE=%(outfile)s.pdf
OUTPUT=%(outfile)s
VALIDATION_STRINGENCY=SILENT
>& %(outfile)s'''
Expand Down Expand Up @@ -110,6 +111,24 @@ def addPseudoSequenceQuality(infile, outfile):
P.run(statement)


def mergeInsertSize(infiles, outfile):
'''merge the insert size files into one file'''

out = iotools.open_file(outfile,"w")

out.write("SAMPLE_NAME\tMEDIAN_INSERT_SIZE\tMODE_INSERT_SIZE\tMEDIAN_ABSOLUTE_DEVIATION\tMIN_INSERT_SIZE\t\
MAX_INSERT_SIZE\tMEAN_INSERT_SIZE\tSTANDARD_DEVIATION\tREAD_PAISR\t\
PAIR_ORIENTATION\tWIDTH_OF_10_PERCENT\tWIDTH_OF_50_PERCENT\tWIDTH_OF_60_PERCENT\t\
WIDTH_OF_70_PERCENT\tWIDTH_OF_80_PERCENT\tWIDTH_OF_90_PERCENT\tWIDTH_OF_95_PERCENT\t\
WIDTH_OF_99_PERCENT\tSAMPLE\tLIBRARY\tREAD_GROUP\n")

for infile in infiles:
name = infile.replace(".insert_stats","")
name = name.replace("Picard_stats.dir/","")
metrics = iotools.open_file(infile).readlines()[7].strip().split("\t")
out.write("%s\t%s\n" % (name,"\t".join(metrics)))
out.close()

def copyBamFile(infile, outfile):
'''Make softlinks of the bam files
Expand Down
28 changes: 27 additions & 1 deletion cgatpipelines/tools/pipeline_bamstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,25 @@ def buildPicardStats(infiles, outfile):
PICARD_MEMORY)


@P.add_doc(bamstats.buildPicardInsertSizeStats)
@transform(intBam,
regex("BamFiles.dir/(.*).bam$"),
add_inputs(os.path.join(PARAMS["genome_dir"],
PARAMS["genome"] + ".fa")),
r"Picard_stats.dir/\1.insert_stats")
def buildPicardInserts(infiles, outfile):
''' build Picard alignment stats '''
infile, reffile = infiles

if "transcriptome.dir" in infile:
reffile = "refcoding.fa"

bamstats.buildPicardInsertSizeStats(infile,
outfile,
reffile,
PICARD_MEMORY)


@P.add_doc(bamstats.buildPicardDuplicationStats)
@transform(intBam,
regex("BamFiles.dir/(.*).bam$"),
Expand Down Expand Up @@ -759,6 +778,12 @@ def loadTranscriptProfile(infiles, outfile):
bamstats.loadTranscriptProfile(infiles, outfile)


@merge(buildPicardInserts, "picard_insert_metrics.csv")
def mergePicardInsertMetrics(infiles, outfile):
''' merge insert stats into a single table'''
bamstats.mergeInsertSize(infiles, outfile)


@P.add_doc(bamstats.loadStrandSpecificity)
@jobs_limit(PARAMS.get("jobs_limit_db", 1), "db")
@follows(loadTranscriptProfile)
Expand Down Expand Up @@ -812,7 +837,8 @@ def views():
loadExonValidation,
loadPicardRnaSeqMetrics,
loadTranscriptProfile,
loadStrandSpecificity)
loadStrandSpecificity,
mergePicardInsertMetrics)
def full():
'''a dummy task to run all tasks in the pipeline'''
pass
Expand Down

0 comments on commit 06f35b7

Please sign in to comment.