diff --git a/CHANGES.md b/CHANGES.md
index ed93a2a..ee08a2a 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,6 @@
+### 0.4.0
+* Added detectExtremeDepth code. Replaces functionality in PCAP-core detectExtremeDepth.pl
+
### 0.3.1
* Changes HTSlib to 1.3.2 (bringing all stack in line)
* Changed libBigWig archive get to use release rather than commit id (actually the same but harder to tell which release it is)
diff --git a/VERSION.txt b/VERSION.txt
index 9e11b32..1d0ba9e 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-0.3.1
+0.4.0
diff --git a/c/Makefile b/c/Makefile
index 1d04663..6eb8058 100644
--- a/c/Makefile
+++ b/c/Makefile
@@ -71,6 +71,7 @@ BAM2BW_TARGET=../bin/bam2bw
BAM2BASES_TARGET=../bin/bam2bwbases
CAT_TARGET=../bin/bwcat
BG2BW_TARGET=../bin/bg2bw
+DEXDEPTH_TARGET=../bin/detectExtremeDepth
make_BW=../bin/makebw
@@ -84,8 +85,8 @@ make_BW=../bin/makebw
.NOTPARALLEL: test
-all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) test
- @echo bwcat, bwjoin, bam2bedgraph, bam2bw and bam2bwbases compiled.
+all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) $(DEXDEPTH_TARGET) test
+ @echo bwcat, bwjoin, bam2bedgraph, bam2bw, detectExtremeDepth and bam2bwbases compiled.
$(CAT_TARGET): $(OBJS)
$(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) ./catbw.c $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) $(LIBBWLIBS) -o $(CAT_TARGET)
@@ -105,13 +106,16 @@ $(BAM2BG_TARGET): $(OBJS)
$(BG2BW_TARGET): $(OBJS)
$(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) ./bg2bw.c $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) $(LIBBWLIBS) -o $(BG2BW_TARGET)
+$(DEXDEPTH_TARGET): $(OBJS)
+ $(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) ./detectExtremeDepth.c $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) $(LIBBWLIBS) -o $(DEXDEPTH_TARGET)
+
pre:
mkdir ../bin
#Unit Tests
-test: $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET)
+test: $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(DEXDEPTH_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET)
test: CFLAGS += $(JOIN_INCLUDES) $(INCLUDES) -I./ $(OBJS) $(LFLAGS) $(LIBS) $(CAT_LFLAGS)
test: $(TESTS)
sh ./c_tests/runtests.sh
@@ -138,6 +142,9 @@ make_bwjoin: $(JOIN_TARGET)
make_bg2bw: $(BG2BW_TARGET)
@echo $(BG2BW_TARGET) done
+make_detectExtremeDepth: $(DEXDEPTH_TARGET)
+ @echo $(DEXDEPTH_TARGET) done
+
make_htslib_tmp:
$(MD) $(HTSTMP)
#Do some magic to ensure we compile with the static libhts.a rather than libhts.so
@@ -149,7 +156,7 @@ remove_htslib_tmp:
copyscript:
cp ./scripts/* ./bin/
- chmod a+x $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET)
+ chmod a+x $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(DEXDEPTH_TARGET)
valgrind:
VALGRIND="valgrind --log-file=/tmp/valgrind-%p.log" $(MAKE)
@@ -164,7 +171,7 @@ valgrind:
clean:
@echo clean
- $(RM) ./*.o *~ $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BG2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./c_tests/*.gcda ./c_tests/*.gcov ./c_tests/*.gcno
+ $(RM) ./*.o *~ $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BG2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(DEXDEPTH_TARGET) $(CAT_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./c_tests/*.gcda ./c_tests/*.gcov ./c_tests/*.gcno
-rm -rf $(HTSTMP) ../bin
depend: $(SRCS)
diff --git a/c/detectExtremeDepth.c b/c/detectExtremeDepth.c
new file mode 100644
index 0000000..e9bd1d8
--- /dev/null
+++ b/c/detectExtremeDepth.c
@@ -0,0 +1,226 @@
+/** LICENSE
+* Copyright (c) 2017 Genome Research Ltd.
+*
+* Author: Cancer Genome Project cgpit@sanger.ac.uk
+*
+* This file is part of cgpBigWig.
+*
+* cgpBigWig is free software: you can redistribute it and/or modify it under
+* the terms of the GNU Affero General Public License as published by the Free
+* Software Foundation; either version 3 of the License, or (at your option) any
+* later version.
+*
+* This program is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+* details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see .
+*
+* 1. The usage of a range of years within a copyright statement contained within
+* this distribution should be interpreted as being equivalent to a list of years
+* including the first and last year specified and all consecutive years between
+* them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007-
+* 2009, 2011-2012’ should be interpreted as being identical to a statement that
+* reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright
+* statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being
+* identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008,
+* 2009, 2010, 2011, 2012’."
+*
+*/
+
+#include
+#include
+#include
+#include
+#include "bigWig.h"
+#include "utils.h"
+
+char *outdir = NULL;
+char *bigwig = NULL;
+char *ref = NULL;
+char *decode = NULL;
+int sd = 12;
+
+void print_usage (int exit_code){
+ printf("Usage: detectExtremeDepth - Generate profile of BigWig file and identify regions outside the normal range\n\n");
+ printf("-b --bigwig [file] BigWig file path.\n");
+ printf("-o --output [dir] Folder to send output to\n");
+ printf(" - named as input file with '.tab' extension\n\n");
+ printf("Optional:\n");
+ printf("-r --ref [str] Restrict to this reference (mainly for testing)\n");
+ printf(" - without 'chr' prefix, will test with and without the 'chr' for you.\n");
+ printf(" - if '-r' defined '.{val}' will prefix '.bed'\n");
+ printf("-s --sd [int] Number of standard deviations above mean for group to be included [%d].\n",sd);
+ printf("-d --decode [str] Decode -r to chromosome names (do not include 'chr')\n");
+ printf(" e.g. -d 23:X -d 24:Y -d 25:MT\n\n");
+ printf("Other:\n");
+ printf("-h --help Display this usage information.\n");
+ printf("-v --version Prints the version number.\n\n");
+
+ exit(exit_code);
+}
+
+void setup_options(int argc, char *argv[]){
+ const struct option long_opts[] =
+ {
+ {"bigwig", required_argument, 0, 'b'},
+ {"ref", required_argument, 0, 'r'},
+ {"output",required_argument,0,'o'},
+ {"decode",required_argument,0,'d'},
+ {"sd",required_argument,0,'s'},
+ {"help", no_argument, 0, 'h'},
+ {"version", no_argument, 0, 'v'},
+ { NULL, 0, NULL, 0}
+ }; //End of declaring opts
+
+ int index = 0;
+ int iarg = 0;
+
+ //Iterate through options
+ while((iarg = getopt_long(argc, argv, "r:s:o:b:d:hv",long_opts, &index)) != -1){
+ switch(iarg){
+ case 'b':
+ bigwig = optarg;
+ break;
+ case 'r':
+ ref = optarg;
+ break;
+ case 'o':
+ outdir = optarg;
+ break;
+ case 'h':
+ print_usage (0);
+ break;
+ case 'v':
+ print_version (0);
+ break;
+ case 'd':
+ decode = optarg;
+ break;
+ case 's':
+ if(sscanf(optarg, "%i", &sd) != 1){
+ fprintf(stderr,"Error parsing -s|--sd argument '%s'. Should be an integer > 0",optarg);
+ print_usage(1);
+ }
+ break;
+ case '?':
+ print_usage (1);
+ break;
+ default:
+ print_usage (0);
+ }; // End of args switch statement
+
+ }//End of iteration through options
+
+ //Do some checking to ensure required arguments were passed and are accessible files
+ if(outdir == NULL){
+ fprintf(stderr,"Output directory -o|output should be defined.\n");
+ print_usage(1);
+ }
+
+ if(check_exist(bigwig)!= 1){
+ fprintf(stderr,"Bigwig file %s does not appear to exist.\n",ref);
+ print_usage(1);
+ }
+}
+
+char *find_last_dot(char* str){
+ char *ptr = str;
+ char *dot = NULL;
+ while(*ptr++){
+ if (*ptr == '.'){
+ dot = ptr;
+ }
+ }
+ return dot;
+}
+
+int main(int argc, char *argv[]){
+ setup_options(argc, argv);
+ bigWigFile_t *fp = NULL;
+ bwOverlappingIntervals_t *intervals = NULL;
+ char *newfname = NULL;
+ FILE *out = NULL;
+ //Initialize enough space to hold 128KiB (1<<17) of data at a time
+ if(bwInit(1<<17) != 0) {
+ fprintf(stderr, "Received an error in bwInit\n");
+ return 1;
+ }
+
+ //Open the local/remote file
+ fp = bwOpen(bigwig, NULL, "r");
+ if(!fp) {
+ fprintf(stderr, "Error opening %s\n", bigwig);
+ return 1;
+ }
+
+ //Open the output file.
+ char *filename = basename(bigwig);
+ newfname = malloc(sizeof(char) * (strlen(filename)+2));
+ if(!newfname){
+ fprintf(stderr,"Error assigning memory for new filename.");
+ goto error;
+ }
+ strcpy(newfname, filename);
+ char *lastdot = find_last_dot(newfname);
+ strcpy(lastdot,".bed");
+
+ //Add output directory to new filename
+ char *outputloc = malloc(sizeof(char) * (strlen(outdir) + strlen(newfname) + 2));
+ strcpy(outputloc,outdir);
+ strcat(outputloc,"/");
+ strcat(outputloc,newfname);
+
+ out = fopen(outputloc, "w");
+ if(!out){
+ fprintf(stderr,"Error opening output file %s.\n",outputloc);
+ goto error;
+ }
+ //Iterate through each chromosome in this file
+ int64_t i=0;
+ for(i=0;icl->nKeys;i++){
+ double *mn = bwStats(fp,fp->cl->chrom[i],0, fp->cl->len[i],1,mean);
+ double *stdv = bwStats(fp,fp->cl->chrom[i],0, fp->cl->len[i],1,stdev);
+
+ fprintf(stderr,"%s: mean %.2f, stdev %.2f\n",fp->cl->chrom[i],mn[0],stdv[0]);
+ intervals = bwGetOverlappingIntervals(fp, fp->cl->chrom[i], 0, fp->cl->len[i]);
+ if(!intervals){
+ fprintf(stderr,"Error retrieving intervals for %s:%d-%d\n",fp->cl->chrom[i], 0, fp->cl->len[i]);
+ goto error;
+ }
+ if(intervals->l){
+ double max_val = mn[0] + (stdv[0] * sd);
+ fprintf(stderr,"%s: Max depth permitted = %.2f\n", fp->cl->chrom[i], max_val);
+ uint64_t k=0;
+ for(k=0;kl;k++){
+ if(intervals->value[k] > max_val){//If this exceeds our limit
+ //Write to output file
+ int writ = fprintf(out,"%s\t%d\t%d\t%.0f\n",fp->cl->chrom[i],intervals->start[k],intervals->end[k],intervals->value[k]);
+ if(!writ){
+ fprintf(stderr,"Error writing extreme depth interval to output file %s:%d-%d\n",fp->cl->chrom[i], 0, fp->cl->len[i]);
+ goto error;
+ }
+ }
+ }
+ }//End of iteration through intervals in this chr
+ bwDestroyOverlappingIntervals(intervals);
+ }
+
+ bwClose(fp);
+ bwCleanup();
+ fflush(out);
+ fclose(out);
+ free(newfname);
+ return 0;
+
+error:
+ if(out) fclose(out);
+ if(newfname) free(newfname);
+ if(intervals) bwDestroyOverlappingIntervals(intervals);
+ if(fp) bwClose(fp);
+ bwCleanup();
+ return 1;
+
+}
diff --git a/setup.sh b/setup.sh
index 7a4c59b..30ea77e 100755
--- a/setup.sh
+++ b/setup.sh
@@ -163,6 +163,7 @@ else
cp bin/bwcat $INST_PATH/bin/.
cp bin/bam2bwbases $INST_PATH/bin/.
cp bin/bg2bw $INST_PATH/bin/.
+ cp bin/detectExtremeDepth $INST_PATH/bin/.
touch $SETUP_DIR/cgpBigWig.success
# need to clean up as will clash with other version
make -C c clean