From efeedb2108bf3a93c915a9ab53c1a963363c4b72 Mon Sep 17 00:00:00 2001 From: David Jones Date: Thu, 15 Sep 2016 15:58:04 +0100 Subject: [PATCH 1/3] Added bedgraph to bw funcionality --- c/Makefile | 14 ++- c/bg2bw.c | 208 +++++++++++++++++++++++++++++++++++++++ c/c_tests/test_5bg2bw.sh | 73 ++++++++++++++ setup.sh | 1 + 4 files changed, 293 insertions(+), 3 deletions(-) create mode 100644 c/bg2bw.c create mode 100755 c/c_tests/test_5bg2bw.sh diff --git a/c/Makefile b/c/Makefile index 3c3a960..1d04663 100644 --- a/c/Makefile +++ b/c/Makefile @@ -70,6 +70,7 @@ BAM2BG_TARGET=../bin/bam2bedgraph BAM2BW_TARGET=../bin/bam2bw BAM2BASES_TARGET=../bin/bam2bwbases CAT_TARGET=../bin/bwcat +BG2BW_TARGET=../bin/bg2bw make_BW=../bin/makebw @@ -83,7 +84,7 @@ make_BW=../bin/makebw .NOTPARALLEL: test -all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) test +all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) test @echo bwcat, bwjoin, bam2bedgraph, bam2bw and bam2bwbases compiled. $(CAT_TARGET): $(OBJS) @@ -101,12 +102,16 @@ $(BAM2BASES_TARGET): $(OBJS) $(BAM2BG_TARGET): $(OBJS) $(CC) $(CFLAGS) $(JOIN_INCLUDES) $(INCLUDES) ./bam2bedgraph.c $(OBJS) $(LFLAGS) $(LIBS) -o $(BAM2BG_TARGET) +$(BG2BW_TARGET): $(OBJS) + $(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) ./bg2bw.c $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) $(LIBBWLIBS) -o $(BG2BW_TARGET) + + pre: mkdir ../bin #Unit Tests -test: $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) +test: $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) test: CFLAGS += $(JOIN_INCLUDES) $(INCLUDES) -I./ $(OBJS) $(LFLAGS) $(LIBS) $(CAT_LFLAGS) test: $(TESTS) sh ./c_tests/runtests.sh @@ -130,6 +135,9 @@ make_bwcat: $(CAT_TARGET) make_bwjoin: $(JOIN_TARGET) @echo $(JOIN_TARGET) done +make_bg2bw: $(BG2BW_TARGET) + @echo $(BG2BW_TARGET) done + make_htslib_tmp: $(MD) $(HTSTMP) #Do some magic to ensure we compile with the static libhts.a rather than libhts.so @@ -156,7 +164,7 @@ valgrind: clean: @echo clean - $(RM) ./*.o *~ $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./c_tests/*.gcda ./c_tests/*.gcov ./c_tests/*.gcno + $(RM) ./*.o *~ $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BG2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./c_tests/*.gcda ./c_tests/*.gcov ./c_tests/*.gcno -rm -rf $(HTSTMP) ../bin depend: $(SRCS) diff --git a/c/bg2bw.c b/c/bg2bw.c new file mode 100644 index 0000000..8458a49 --- /dev/null +++ b/c/bg2bw.c @@ -0,0 +1,208 @@ +/** LICENSE +* Copyright (c) 2016 Genome Research Ltd. +* +* Author: Cancer Genome Project cgpit@sanger.ac.uk +* +* This file is part of cgpBigWig. +* +* cgpBigWig is free software: you can redistribute it and/or modify it under +* the terms of the GNU Affero General Public License as published by the Free +* Software Foundation; either version 3 of the License, or (at your option) any +* later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +* details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +* +* 1. The usage of a range of years within a copyright statement contained within +* this distribution should be interpreted as being equivalent to a list of years +* including the first and last year specified and all consecutive years between +* them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +* 2009, 2011-2012’ should be interpreted as being identical to a statement that +* reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +* statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +* identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +* 2009, 2010, 2011, 2012’." +* +*/ + +#include +#include +#include +#include "bigWig.h" +#include "utils.h" +#include "dbg.h" + +char *input_file = NULL; +char *out_file = "output.bw"; +char *chrlist = NULL; + +void print_usage (int exit_code){ + printf("Usage: bg2bw -i input.bed -c chrom.list -o output.bw\n"); + printf("bg2bw can be used to generate a bw file from a bedgraph file.\n\n"); + printf("-i --input [file] Path to the input [b|cr]am file.\n"); + printf("-o --outfile [file] Path to the output .bw file produced. [default:'%s']\n",out_file); + printf("-c --chromList [file] Path to chrom.list a .tsv where first two columns are contig name and length.\n\n"); + printf ("Other:\n"); + printf("-h --help Display this usage information.\n"); + printf("-v --version Prints the version number.\n\n"); + + exit(exit_code); +} + +void setup_options(int argc, char *argv[]){ + const struct option long_opts[] = + { + {"input", required_argument, 0, 'i'}, + {"outfile",required_argument, 0, 'o'}, + {"chromList",required_argument, 0, 'c'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'v'}, + { NULL, 0, NULL, 0} + + }; //End of declaring opts + + int index = 0; + int iarg = 0; + + //Iterate through options + while((iarg = getopt_long(argc, argv, "c:i:o:hv",long_opts, &index)) != -1){ + switch(iarg){ + case 'i': + input_file = optarg; + if(check_exist(input_file) != 1){ + fprintf(stderr,"Input bed file %s does not appear to exist.\n",input_file); + print_usage(1); + } + break; + + case 'c': + chrlist = optarg; + if(check_exist(chrlist) != 1){ + fprintf(stderr,"Input chrlist file %s does not appear to exist.\n",chrlist); + print_usage(1); + } + break; + case 'o': + out_file = optarg; + break; + case 'h': + print_usage (0); + break; + case 'v': + print_version (0); + break; + case '?': + print_usage (1); + break; + default: + print_usage (0); + }; // End of args switch statement + + }//End of iteration through options + + if(input_file==NULL){ + fprintf(stderr,"Required option -i|--input not defined.\n"); + print_usage(1); + } + + if(chrlist==NULL){ + fprintf(stderr,"Required option -c|--chrList not defined.\n"); + print_usage(1); + } + + return; +} + +chromList_t *parse_chrom_list(char *chrom_list_file){ + chromList_t *cl = NULL; + FILE *in = NULL; + char **contigs = NULL; + uint32_t *lengths = NULL; + int ln_count = line_count(chrom_list_file); + contigs = malloc(sizeof(char *) * ln_count); + check_mem(contigs); + lengths = malloc(sizeof(uint32_t *) * ln_count); + check_mem(lengths); + int chr_count = 0; + in = fopen(chrom_list_file,"r"); + check(in!=NULL,"Error opening chrom list file '%s' for reading.",chrom_list_file); + char line[2048]; + //Iterate through each line in the chromlist file + while(fgets(line, sizeof(line),in)){ + char *cntg = malloc(sizeof(char *) * 2048); + check_mem(cntg); + uint32_t len; + int no = sscanf(line,"%[^\t]\t%"SCNu32"\t",cntg,&len); + check(no==2,"Error extracting contig name and length from line %s.",line); + contigs[chr_count] = cntg; + lengths[chr_count] = len; + chr_count++; + } + check(chr_count==ln_count,"Error contigs found %d != line_count %d",chr_count,ln_count); + fclose(in); + cl = bwCreateChromList(contigs, lengths, chr_count); + check(cl!=NULL,"Error creating ChromList."); + return cl; + +error: + if(in) fclose(in); + if(contigs) free(contigs); + if(lengths) free(lengths); + return NULL; +} + +int main(int argc, char *argv[]){ + setup_options(argc, argv); + FILE *in = NULL; + bigWigFile_t *fp = NULL; + char *ctg = NULL; + in = fopen(input_file,"r"); + check(in != NULL, "Error opeining input bed file '%s' to read.",input_file); + + //Read in the chromlist + int chk = bwInit(1<<17); + check(chk==0,"Received an error in bwInit: %d",chk); + + fp = bwOpen(out_file, NULL, "w"); + check(fp!=NULL,"Error opening output bw file '%s' for writing.",out_file); + + chk = bwCreateHdr(fp, 10); + check(chk==0,"Error creating bw header: %d.",chk); + + fp->cl = parse_chrom_list(chrlist); + check(fp->cl!=NULL,"Error parsing chrom list for contig information."); + + chk = bwWriteHdr(fp); + check(chk==0,"Error writing bw header: %d.",chk); + + char line[2048]; + int num = 0; + ctg = malloc(sizeof(char) * 2048); + uint32_t start; + uint32_t stop; + float res; + while(fgets(line,sizeof(line),in)){ + num = sscanf(line,"%[^\t]\t%"SCNu32"\t%"SCNu32"\t%f\n",ctg,&start,&stop,&res); + check(num==4,"Error parsing bed line '%s' to bw format.",line); + chk = bwAddIntervals(fp, &ctg, &start, &stop, &res, 1); + check(chk==0,"Error encountered adding bed line '%s' to bw file: %d.",line,chk); + } + + free(ctg); + fclose(in); + bwClose(fp); + bwCleanup(); + return 0; +error: + if(fp) bwClose(fp); + bwCleanup(); + if(ctg) free(ctg); + if(in) fclose(in); + return 1; +} + diff --git a/c/c_tests/test_5bg2bw.sh b/c/c_tests/test_5bg2bw.sh new file mode 100755 index 0000000..23dc60c --- /dev/null +++ b/c/c_tests/test_5bg2bw.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +########## LICENSE ########## +# Copyright (c) 2016 Genome Research Ltd. +# +# Author: Cancer Genome Project cgpit@sanger.ac.uk +# +# This file is part of cgpBigWig. +# +# cgpBigWig is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# 1. The usage of a range of years within a copyright statement contained within +# this distribution should be interpreted as being equivalent to a list of years +# including the first and last year specified and all consecutive years between +# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007- +# 2009, 2011-2012’ should be interpreted as being identical to a statement that +# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright +# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being +# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, +# 2009, 2010, 2011, 2012’." +# +########################### + +../bin/bg2bw -i ../test_data/volvox-sorted.coverage.expected.bed -o ../test_data/bg2bw.got.bw -c ../test_data/volvox.fa.fai; +if [ "$?" != "0" ]; +then + echo "ERROR in "$0": Running bw2bg" + echo "------" + rm -f ../test_data/bg2bw.got.bw + exit 1 +fi + +../bin/bwcat -i ../test_data/bg2bw.got.bw > ../test_data/bg2bw.got.bed; +if [ "$?" != "0" ]; +then + echo "ERROR in "$0": Running bwcat" + echo "------" + rm -f ../test_data/bg2bw.got.bw ../test_data/bg2bw.got.bed + exit 1 +fi + +../bin/bwcat -i ../test_data/volvox-sorted.coverage.expected.bw > ../test_data/volvox-sorted.coverage.expected.out; +if [ "$?" != "0" ]; +then + echo "ERROR in "$0": Running bwcat" + echo "------" + rm -f ../test_data/bg2bw.got.bw ../test_data/bg2bw.got.bed ../test_data/volvox-sorted.coverage.expected.out + exit 1 +fi + + + +diff ../test_data/bg2bw.got.bed ../test_data/volvox-sorted.coverage.expected.out; +if [ "$?" != "0" ]; +then + echo "ERROR in "$0": Total file comparisons don't match" + echo "------" + rm -f ../test_data/bg2bw.got.bed ../test_data/bg2bw.got.bw ../test_data/volvox-sorted.coverage.expected.out + exit 1 +fi + +rm -f ../test_data/bg2bw.got.bed ../test_data/bg2bw.got.bw ../test_data/volvox-sorted.coverage.expected.out \ No newline at end of file diff --git a/setup.sh b/setup.sh index 04c5a80..bd83b23 100755 --- a/setup.sh +++ b/setup.sh @@ -156,6 +156,7 @@ else cp bin/bam2bw $INST_PATH/bin/. cp bin/bwcat $INST_PATH/bin/. cp bin/bam2bwbases $INST_PATH/bin/. + cp bin/bg2bw $INST_PATH/bin/. touch $SETUP_DIR/cgpBigWig.success # need to clean up as will clash with other version make -C c clean From 800b9975b418a2e7d2fa7309af9981eec209a0dc Mon Sep 17 00:00:00 2001 From: David Jones Date: Thu, 15 Sep 2016 16:00:55 +0100 Subject: [PATCH 2/3] Added changes.md --- CHANGES.md | 2 ++ README.md | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 CHANGES.md diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..5b85890 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,2 @@ +###0.1.0 +* First release \ No newline at end of file diff --git a/README.md b/README.md index 02f0471..a3d4e53 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ compilation libraries and tools to be available, most are listed in [`INSTALL`]( [bam2bw](#bam2bw) - Generate bigwig (.bw) coverage file from bam +[bg2bw](#bg2bw) - Generate bigwig (.bw) coverage file from bedgraph (.bed) format + [bam2bwbases](#bam2bwbases) - Generate bigwig (.bw) proportion file of each base at a position from bam [bam2bedgraph](#bam2bedgraph) - Generate a coverage bedgraph from bam @@ -87,6 +89,21 @@ Other: -v --version Prints the version number. ``` +##### bg2bw +Generate bw coverage file from bedgraph (.bed) format +``` +Usage: bg2bw -i input.bed -c chrom.list -o output.bw +bg2bw can be used to generate a bw file from a bedgraph file. + +-i --input [file] Path to the input [b|cr]am file. +-o --outfile [file] Path to the output .bw file produced. [default:'output.bw'] +-c --chromList [file] Path to chrom.list a .tsv where first two columns are contig name and length. + +Other: +-h --help Display this usage information. +-v --version Prints the version number. +``` + ##### bam2bwbases Generate bw proportion file of each base at a position from bam ``` From bf901c5138a879fe25574830cecf0ced8a0a720a Mon Sep 17 00:00:00 2001 From: David Jones Date: Thu, 15 Sep 2016 16:11:38 +0100 Subject: [PATCH 3/3] Updated version and CHANGES.md --- CHANGES.md | 3 +++ VERSION.txt | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 5b85890..46ced24 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,2 +1,5 @@ +###0.2.0 +* Added bedgraph to bw functionality via bg2bw + ###0.1.0 * First release \ No newline at end of file diff --git a/VERSION.txt b/VERSION.txt index 6e8bf73..0ea3a94 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -0.1.0 +0.2.0