Skip to content

Commit

Permalink
Merge pull request #3 from cancerit/feature/bedgraph2bigwig
Browse files Browse the repository at this point in the history
Feature/bedgraph2bigwig
  • Loading branch information
David Jones authored Sep 15, 2016
2 parents 4b89063 + 800b997 commit 26292d3
Show file tree
Hide file tree
Showing 6 changed files with 312 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
###0.1.0
* First release
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ compilation libraries and tools to be available, most are listed in [`INSTALL`](

[bam2bw](#bam2bw) - Generate bigwig (.bw) coverage file from bam

[bg2bw](#bg2bw) - Generate bigwig (.bw) coverage file from bedgraph (.bed) format

[bam2bwbases](#bam2bwbases) - Generate bigwig (.bw) proportion file of each base at a position from bam

[bam2bedgraph](#bam2bedgraph) - Generate a coverage bedgraph from bam
Expand Down Expand Up @@ -87,6 +89,21 @@ Other:
-v --version Prints the version number.
```

##### bg2bw
Generate bw coverage file from bedgraph (.bed) format
```
Usage: bg2bw -i input.bed -c chrom.list -o output.bw
bg2bw can be used to generate a bw file from a bedgraph file.
-i --input [file] Path to the input [b|cr]am file.
-o --outfile [file] Path to the output .bw file produced. [default:'output.bw']
-c --chromList [file] Path to chrom.list a .tsv where first two columns are contig name and length.
Other:
-h --help Display this usage information.
-v --version Prints the version number.
```

##### bam2bwbases
Generate bw proportion file of each base at a position from bam
```
Expand Down
14 changes: 11 additions & 3 deletions c/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ BAM2BG_TARGET=../bin/bam2bedgraph
BAM2BW_TARGET=../bin/bam2bw
BAM2BASES_TARGET=../bin/bam2bwbases
CAT_TARGET=../bin/bwcat
BG2BW_TARGET=../bin/bg2bw
make_BW=../bin/makebw


Expand All @@ -83,7 +84,7 @@ make_BW=../bin/makebw

.NOTPARALLEL: test

all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) test
all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) test
@echo bwcat, bwjoin, bam2bedgraph, bam2bw and bam2bwbases compiled.

$(CAT_TARGET): $(OBJS)
Expand All @@ -101,12 +102,16 @@ $(BAM2BASES_TARGET): $(OBJS)
$(BAM2BG_TARGET): $(OBJS)
$(CC) $(CFLAGS) $(JOIN_INCLUDES) $(INCLUDES) ./bam2bedgraph.c $(OBJS) $(LFLAGS) $(LIBS) -o $(BAM2BG_TARGET)

$(BG2BW_TARGET): $(OBJS)
$(CC) $(JOIN_INCLUDES) $(INCLUDES) $(CFLAGS) ./bg2bw.c $(OBJS) $(LFLAGS) $(CAT_LFLAGS) $(LIBS) $(LIBBWLIBS) -o $(BG2BW_TARGET)


pre:
mkdir ../bin


#Unit Tests
test: $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET)
test: $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET)
test: CFLAGS += $(JOIN_INCLUDES) $(INCLUDES) -I./ $(OBJS) $(LFLAGS) $(LIBS) $(CAT_LFLAGS)
test: $(TESTS)
sh ./c_tests/runtests.sh
Expand All @@ -130,6 +135,9 @@ make_bwcat: $(CAT_TARGET)
make_bwjoin: $(JOIN_TARGET)
@echo $(JOIN_TARGET) done

make_bg2bw: $(BG2BW_TARGET)
@echo $(BG2BW_TARGET) done

make_htslib_tmp:
$(MD) $(HTSTMP)
#Do some magic to ensure we compile with the static libhts.a rather than libhts.so
Expand All @@ -156,7 +164,7 @@ valgrind:

clean:
@echo clean
$(RM) ./*.o *~ $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./c_tests/*.gcda ./c_tests/*.gcov ./c_tests/*.gcno
$(RM) ./*.o *~ $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BG2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./c_tests/*.gcda ./c_tests/*.gcov ./c_tests/*.gcno
-rm -rf $(HTSTMP) ../bin

depend: $(SRCS)
Expand Down
208 changes: 208 additions & 0 deletions c/bg2bw.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
/** LICENSE
* Copyright (c) 2016 Genome Research Ltd.
*
* Author: Cancer Genome Project [email protected]
*
* This file is part of cgpBigWig.
*
* cgpBigWig is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* 1. The usage of a range of years within a copyright statement contained within
* this distribution should be interpreted as being equivalent to a list of years
* including the first and last year specified and all consecutive years between
* them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007-
* 2009, 2011-2012’ should be interpreted as being identical to a statement that
* reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright
* statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being
* identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008,
* 2009, 2010, 2011, 2012’."
*
*/

#include <getopt.h>
#include <stdlib.h>
#include <stdio.h>
#include "bigWig.h"
#include "utils.h"
#include "dbg.h"

char *input_file = NULL;
char *out_file = "output.bw";
char *chrlist = NULL;

void print_usage (int exit_code){
printf("Usage: bg2bw -i input.bed -c chrom.list -o output.bw\n");
printf("bg2bw can be used to generate a bw file from a bedgraph file.\n\n");
printf("-i --input [file] Path to the input [b|cr]am file.\n");
printf("-o --outfile [file] Path to the output .bw file produced. [default:'%s']\n",out_file);
printf("-c --chromList [file] Path to chrom.list a .tsv where first two columns are contig name and length.\n\n");
printf ("Other:\n");
printf("-h --help Display this usage information.\n");
printf("-v --version Prints the version number.\n\n");

exit(exit_code);
}

void setup_options(int argc, char *argv[]){
const struct option long_opts[] =
{
{"input", required_argument, 0, 'i'},
{"outfile",required_argument, 0, 'o'},
{"chromList",required_argument, 0, 'c'},
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'v'},
{ NULL, 0, NULL, 0}

}; //End of declaring opts

int index = 0;
int iarg = 0;

//Iterate through options
while((iarg = getopt_long(argc, argv, "c:i:o:hv",long_opts, &index)) != -1){
switch(iarg){
case 'i':
input_file = optarg;
if(check_exist(input_file) != 1){
fprintf(stderr,"Input bed file %s does not appear to exist.\n",input_file);
print_usage(1);
}
break;

case 'c':
chrlist = optarg;
if(check_exist(chrlist) != 1){
fprintf(stderr,"Input chrlist file %s does not appear to exist.\n",chrlist);
print_usage(1);
}
break;
case 'o':
out_file = optarg;
break;
case 'h':
print_usage (0);
break;
case 'v':
print_version (0);
break;
case '?':
print_usage (1);
break;
default:
print_usage (0);
}; // End of args switch statement

}//End of iteration through options

if(input_file==NULL){
fprintf(stderr,"Required option -i|--input not defined.\n");
print_usage(1);
}

if(chrlist==NULL){
fprintf(stderr,"Required option -c|--chrList not defined.\n");
print_usage(1);
}

return;
}

chromList_t *parse_chrom_list(char *chrom_list_file){
chromList_t *cl = NULL;
FILE *in = NULL;
char **contigs = NULL;
uint32_t *lengths = NULL;
int ln_count = line_count(chrom_list_file);
contigs = malloc(sizeof(char *) * ln_count);
check_mem(contigs);
lengths = malloc(sizeof(uint32_t *) * ln_count);
check_mem(lengths);
int chr_count = 0;
in = fopen(chrom_list_file,"r");
check(in!=NULL,"Error opening chrom list file '%s' for reading.",chrom_list_file);
char line[2048];
//Iterate through each line in the chromlist file
while(fgets(line, sizeof(line),in)){
char *cntg = malloc(sizeof(char *) * 2048);
check_mem(cntg);
uint32_t len;
int no = sscanf(line,"%[^\t]\t%"SCNu32"\t",cntg,&len);
check(no==2,"Error extracting contig name and length from line %s.",line);
contigs[chr_count] = cntg;
lengths[chr_count] = len;
chr_count++;
}
check(chr_count==ln_count,"Error contigs found %d != line_count %d",chr_count,ln_count);
fclose(in);
cl = bwCreateChromList(contigs, lengths, chr_count);
check(cl!=NULL,"Error creating ChromList.");
return cl;

error:
if(in) fclose(in);
if(contigs) free(contigs);
if(lengths) free(lengths);
return NULL;
}

int main(int argc, char *argv[]){
setup_options(argc, argv);
FILE *in = NULL;
bigWigFile_t *fp = NULL;
char *ctg = NULL;
in = fopen(input_file,"r");
check(in != NULL, "Error opeining input bed file '%s' to read.",input_file);

//Read in the chromlist
int chk = bwInit(1<<17);
check(chk==0,"Received an error in bwInit: %d",chk);

fp = bwOpen(out_file, NULL, "w");
check(fp!=NULL,"Error opening output bw file '%s' for writing.",out_file);

chk = bwCreateHdr(fp, 10);
check(chk==0,"Error creating bw header: %d.",chk);

fp->cl = parse_chrom_list(chrlist);
check(fp->cl!=NULL,"Error parsing chrom list for contig information.");

chk = bwWriteHdr(fp);
check(chk==0,"Error writing bw header: %d.",chk);

char line[2048];
int num = 0;
ctg = malloc(sizeof(char) * 2048);
uint32_t start;
uint32_t stop;
float res;
while(fgets(line,sizeof(line),in)){
num = sscanf(line,"%[^\t]\t%"SCNu32"\t%"SCNu32"\t%f\n",ctg,&start,&stop,&res);
check(num==4,"Error parsing bed line '%s' to bw format.",line);
chk = bwAddIntervals(fp, &ctg, &start, &stop, &res, 1);
check(chk==0,"Error encountered adding bed line '%s' to bw file: %d.",line,chk);
}

free(ctg);
fclose(in);
bwClose(fp);
bwCleanup();
return 0;
error:
if(fp) bwClose(fp);
bwCleanup();
if(ctg) free(ctg);
if(in) fclose(in);
return 1;
}

73 changes: 73 additions & 0 deletions c/c_tests/test_5bg2bw.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash

########## LICENSE ##########
# Copyright (c) 2016 Genome Research Ltd.
#
# Author: Cancer Genome Project [email protected]
#
# This file is part of cgpBigWig.
#
# cgpBigWig is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# 1. The usage of a range of years within a copyright statement contained within
# this distribution should be interpreted as being equivalent to a list of years
# including the first and last year specified and all consecutive years between
# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007-
# 2009, 2011-2012’ should be interpreted as being identical to a statement that
# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright
# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being
# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008,
# 2009, 2010, 2011, 2012’."
#
###########################

../bin/bg2bw -i ../test_data/volvox-sorted.coverage.expected.bed -o ../test_data/bg2bw.got.bw -c ../test_data/volvox.fa.fai;
if [ "$?" != "0" ];
then
echo "ERROR in "$0": Running bw2bg"
echo "------"
rm -f ../test_data/bg2bw.got.bw
exit 1
fi

../bin/bwcat -i ../test_data/bg2bw.got.bw > ../test_data/bg2bw.got.bed;
if [ "$?" != "0" ];
then
echo "ERROR in "$0": Running bwcat"
echo "------"
rm -f ../test_data/bg2bw.got.bw ../test_data/bg2bw.got.bed
exit 1
fi

../bin/bwcat -i ../test_data/volvox-sorted.coverage.expected.bw > ../test_data/volvox-sorted.coverage.expected.out;
if [ "$?" != "0" ];
then
echo "ERROR in "$0": Running bwcat"
echo "------"
rm -f ../test_data/bg2bw.got.bw ../test_data/bg2bw.got.bed ../test_data/volvox-sorted.coverage.expected.out
exit 1
fi



diff ../test_data/bg2bw.got.bed ../test_data/volvox-sorted.coverage.expected.out;
if [ "$?" != "0" ];
then
echo "ERROR in "$0": Total file comparisons don't match"
echo "------"
rm -f ../test_data/bg2bw.got.bed ../test_data/bg2bw.got.bw ../test_data/volvox-sorted.coverage.expected.out
exit 1
fi

rm -f ../test_data/bg2bw.got.bed ../test_data/bg2bw.got.bw ../test_data/volvox-sorted.coverage.expected.out
1 change: 1 addition & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ else
cp bin/bam2bw $INST_PATH/bin/.
cp bin/bwcat $INST_PATH/bin/.
cp bin/bam2bwbases $INST_PATH/bin/.
cp bin/bg2bw $INST_PATH/bin/.
touch $SETUP_DIR/cgpBigWig.success
# need to clean up as will clash with other version
make -C c clean
Expand Down

0 comments on commit 26292d3

Please sign in to comment.