diff --git a/CHANGES.md b/CHANGES.md index ed93a2a..ee08a2a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +### 0.4.0 +* Added detectExtremeDepth code. Replaces functionality in PCAP-core detectExtremeDepth.pl + ### 0.3.1 * Changes HTSlib to 1.3.2 (bringing all stack in line) * Changed libBigWig archive get to use release rather than commit id (actually the same but harder to tell which release it is) diff --git a/VERSION.txt b/VERSION.txt index 9e11b32..1d0ba9e 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -0.3.1 +0.4.0 diff --git a/c/Makefile b/c/Makefile index 1d04663..6eb8058 100644 --- a/c/Makefile +++ b/c/Makefile @@ -71,6 +71,7 @@ BAM2BW_TARGET=../bin/bam2bw BAM2BASES_TARGET=../bin/bam2bwbases CAT_TARGET=../bin/bwcat BG2BW_TARGET=../bin/bg2bw +DEXDEPTH_TARGET=../bin/detectExtremeDepth make_BW=../bin/makebw @@ -84,8 +85,8 @@ make_BW=../bin/makebw .NOTPARALLEL: test -all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) test - @echo bwcat, bwjoin, bam2bedgraph, bam2bw and bam2bwbases compiled. +all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) $(DEXDEPTH_TARGET) test + @echo bwcat, bwjoin, bam2bedgraph, bam2bw, detectExtremeDepth and bam2bwbases compiled. $(CAT_TARGET): $(OBJS) $(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) ./catbw.c $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) $(LIBBWLIBS) -o $(CAT_TARGET) @@ -105,13 +106,16 @@ $(BAM2BG_TARGET): $(OBJS) $(BG2BW_TARGET): $(OBJS) $(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) ./bg2bw.c $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) $(LIBBWLIBS) -o $(BG2BW_TARGET) +$(DEXDEPTH_TARGET): $(OBJS) + $(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) ./detectExtremeDepth.c $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) $(LIBBWLIBS) -o $(DEXDEPTH_TARGET) + pre: mkdir ../bin #Unit Tests -test: $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) +test: $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(DEXDEPTH_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) test: CFLAGS += $(JOIN_INCLUDES) $(INCLUDES) -I./ $(OBJS) $(LFLAGS) $(LIBS) $(CAT_LFLAGS) test: $(TESTS) sh ./c_tests/runtests.sh @@ -138,6 +142,9 @@ make_bwjoin: $(JOIN_TARGET) make_bg2bw: $(BG2BW_TARGET) @echo $(BG2BW_TARGET) done +make_detectExtremeDepth: $(DEXDEPTH_TARGET) + @echo $(DEXDEPTH_TARGET) done + make_htslib_tmp: $(MD) $(HTSTMP) #Do some magic to ensure we compile with the static libhts.a rather than libhts.so @@ -149,7 +156,7 @@ remove_htslib_tmp: copyscript: cp ./scripts/* ./bin/ - chmod a+x $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) + chmod a+x $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(DEXDEPTH_TARGET) valgrind: VALGRIND="valgrind --log-file=/tmp/valgrind-%p.log" $(MAKE) @@ -164,7 +171,7 @@ valgrind: clean: @echo clean - $(RM) ./*.o *~ $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BG2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./c_tests/*.gcda ./c_tests/*.gcov ./c_tests/*.gcno + $(RM) ./*.o *~ $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BG2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(DEXDEPTH_TARGET) $(CAT_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./c_tests/*.gcda ./c_tests/*.gcov ./c_tests/*.gcno -rm -rf $(HTSTMP) ../bin depend: $(SRCS) diff --git a/c/detectExtremeDepth.c b/c/detectExtremeDepth.c new file mode 100644 index 0000000..e9bd1d8 --- /dev/null +++ b/c/detectExtremeDepth.c @@ -0,0 +1,226 @@ +/** LICENSE +* Copyright (c) 2017 Genome Research Ltd. +* +* Author: Cancer Genome Project cgpit@sanger.ac.uk +* +* This file is part of cgpBigWig. +* +* cgpBigWig is free software: you can redistribute it and/or modify it under +* the terms of the GNU Affero General Public License as published by the Free +* Software Foundation; either version 3 of the License, or (at your option) any +* later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +* details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +* +* 1. The usage of a range of years within a copyright statement contained within +* this distribution should be interpreted as being equivalent to a list of years +* including the first and last year specified and all consecutive years between +* them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +* 2009, 2011-2012’ should be interpreted as being identical to a statement that +* reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +* statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +* identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +* 2009, 2010, 2011, 2012’." +* +*/ + +#include +#include +#include +#include +#include "bigWig.h" +#include "utils.h" + +char *outdir = NULL; +char *bigwig = NULL; +char *ref = NULL; +char *decode = NULL; +int sd = 12; + +void print_usage (int exit_code){ + printf("Usage: detectExtremeDepth - Generate profile of BigWig file and identify regions outside the normal range\n\n"); + printf("-b --bigwig [file] BigWig file path.\n"); + printf("-o --output [dir] Folder to send output to\n"); + printf(" - named as input file with '.tab' extension\n\n"); + printf("Optional:\n"); + printf("-r --ref [str] Restrict to this reference (mainly for testing)\n"); + printf(" - without 'chr' prefix, will test with and without the 'chr' for you.\n"); + printf(" - if '-r' defined '.{val}' will prefix '.bed'\n"); + printf("-s --sd [int] Number of standard deviations above mean for group to be included [%d].\n",sd); + printf("-d --decode [str] Decode -r to chromosome names (do not include 'chr')\n"); + printf(" e.g. -d 23:X -d 24:Y -d 25:MT\n\n"); + printf("Other:\n"); + printf("-h --help Display this usage information.\n"); + printf("-v --version Prints the version number.\n\n"); + + exit(exit_code); +} + +void setup_options(int argc, char *argv[]){ + const struct option long_opts[] = + { + {"bigwig", required_argument, 0, 'b'}, + {"ref", required_argument, 0, 'r'}, + {"output",required_argument,0,'o'}, + {"decode",required_argument,0,'d'}, + {"sd",required_argument,0,'s'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'v'}, + { NULL, 0, NULL, 0} + }; //End of declaring opts + + int index = 0; + int iarg = 0; + + //Iterate through options + while((iarg = getopt_long(argc, argv, "r:s:o:b:d:hv",long_opts, &index)) != -1){ + switch(iarg){ + case 'b': + bigwig = optarg; + break; + case 'r': + ref = optarg; + break; + case 'o': + outdir = optarg; + break; + case 'h': + print_usage (0); + break; + case 'v': + print_version (0); + break; + case 'd': + decode = optarg; + break; + case 's': + if(sscanf(optarg, "%i", &sd) != 1){ + fprintf(stderr,"Error parsing -s|--sd argument '%s'. Should be an integer > 0",optarg); + print_usage(1); + } + break; + case '?': + print_usage (1); + break; + default: + print_usage (0); + }; // End of args switch statement + + }//End of iteration through options + + //Do some checking to ensure required arguments were passed and are accessible files + if(outdir == NULL){ + fprintf(stderr,"Output directory -o|output should be defined.\n"); + print_usage(1); + } + + if(check_exist(bigwig)!= 1){ + fprintf(stderr,"Bigwig file %s does not appear to exist.\n",ref); + print_usage(1); + } +} + +char *find_last_dot(char* str){ + char *ptr = str; + char *dot = NULL; + while(*ptr++){ + if (*ptr == '.'){ + dot = ptr; + } + } + return dot; +} + +int main(int argc, char *argv[]){ + setup_options(argc, argv); + bigWigFile_t *fp = NULL; + bwOverlappingIntervals_t *intervals = NULL; + char *newfname = NULL; + FILE *out = NULL; + //Initialize enough space to hold 128KiB (1<<17) of data at a time + if(bwInit(1<<17) != 0) { + fprintf(stderr, "Received an error in bwInit\n"); + return 1; + } + + //Open the local/remote file + fp = bwOpen(bigwig, NULL, "r"); + if(!fp) { + fprintf(stderr, "Error opening %s\n", bigwig); + return 1; + } + + //Open the output file. + char *filename = basename(bigwig); + newfname = malloc(sizeof(char) * (strlen(filename)+2)); + if(!newfname){ + fprintf(stderr,"Error assigning memory for new filename."); + goto error; + } + strcpy(newfname, filename); + char *lastdot = find_last_dot(newfname); + strcpy(lastdot,".bed"); + + //Add output directory to new filename + char *outputloc = malloc(sizeof(char) * (strlen(outdir) + strlen(newfname) + 2)); + strcpy(outputloc,outdir); + strcat(outputloc,"/"); + strcat(outputloc,newfname); + + out = fopen(outputloc, "w"); + if(!out){ + fprintf(stderr,"Error opening output file %s.\n",outputloc); + goto error; + } + //Iterate through each chromosome in this file + int64_t i=0; + for(i=0;icl->nKeys;i++){ + double *mn = bwStats(fp,fp->cl->chrom[i],0, fp->cl->len[i],1,mean); + double *stdv = bwStats(fp,fp->cl->chrom[i],0, fp->cl->len[i],1,stdev); + + fprintf(stderr,"%s: mean %.2f, stdev %.2f\n",fp->cl->chrom[i],mn[0],stdv[0]); + intervals = bwGetOverlappingIntervals(fp, fp->cl->chrom[i], 0, fp->cl->len[i]); + if(!intervals){ + fprintf(stderr,"Error retrieving intervals for %s:%d-%d\n",fp->cl->chrom[i], 0, fp->cl->len[i]); + goto error; + } + if(intervals->l){ + double max_val = mn[0] + (stdv[0] * sd); + fprintf(stderr,"%s: Max depth permitted = %.2f\n", fp->cl->chrom[i], max_val); + uint64_t k=0; + for(k=0;kl;k++){ + if(intervals->value[k] > max_val){//If this exceeds our limit + //Write to output file + int writ = fprintf(out,"%s\t%d\t%d\t%.0f\n",fp->cl->chrom[i],intervals->start[k],intervals->end[k],intervals->value[k]); + if(!writ){ + fprintf(stderr,"Error writing extreme depth interval to output file %s:%d-%d\n",fp->cl->chrom[i], 0, fp->cl->len[i]); + goto error; + } + } + } + }//End of iteration through intervals in this chr + bwDestroyOverlappingIntervals(intervals); + } + + bwClose(fp); + bwCleanup(); + fflush(out); + fclose(out); + free(newfname); + return 0; + +error: + if(out) fclose(out); + if(newfname) free(newfname); + if(intervals) bwDestroyOverlappingIntervals(intervals); + if(fp) bwClose(fp); + bwCleanup(); + return 1; + +} diff --git a/setup.sh b/setup.sh index 7a4c59b..30ea77e 100755 --- a/setup.sh +++ b/setup.sh @@ -163,6 +163,7 @@ else cp bin/bwcat $INST_PATH/bin/. cp bin/bam2bwbases $INST_PATH/bin/. cp bin/bg2bw $INST_PATH/bin/. + cp bin/detectExtremeDepth $INST_PATH/bin/. touch $SETUP_DIR/cgpBigWig.success # need to clean up as will clash with other version make -C c clean