From ef836f60eb5881ec76851323a7e7a57909269487 Mon Sep 17 00:00:00 2001 From: David Jones Date: Mon, 3 Apr 2017 13:25:29 +0100 Subject: [PATCH] Modify parsing region to allow GRCh38 contig names with : in --- CHANGES.md | 3 +++ VERSION.txt | 2 +- c/Makefile | 2 +- c/bam2bw.c | 13 +------------ c/bam2bwbases.c | 5 +++-- c/bam_access.c | 6 ++---- c/catbw.c | 11 +++++++---- c/utils.c | 12 ++++++++---- c/utils.h | 2 +- 9 files changed, 27 insertions(+), 29 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index dd93cde..cb641b3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +### 0.4.1 +* Modified parsing of regions to enable names with : in (i.e. GRCh38) + ### 0.4.1 * Updated libBigWig to 0.3.1 - fixes issue with overflow in very large zoom levels diff --git a/VERSION.txt b/VERSION.txt index 267577d..2b7c5ae 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -0.4.1 +0.4.2 diff --git a/c/Makefile b/c/Makefile index 6eb8058..6ca36c3 100644 --- a/c/Makefile +++ b/c/Makefile @@ -85,7 +85,7 @@ make_BW=../bin/makebw .NOTPARALLEL: test -all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) remove_htslib_tmp $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) $(DEXDEPTH_TARGET) test +all: clean pre make_htslib_tmp $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM2BASES_TARGET) $(JOIN_TARGET) $(CAT_TARGET) $(BG2BW_TARGET) $(DEXDEPTH_TARGET) remove_htslib_tmp test @echo bwcat, bwjoin, bam2bedgraph, bam2bw, detectExtremeDepth and bam2bwbases compiled. $(CAT_TARGET): $(OBJS) diff --git a/c/bam2bw.c b/c/bam2bw.c index 21e3607..3278c7d 100644 --- a/c/bam2bw.c +++ b/c/bam2bw.c @@ -219,16 +219,6 @@ bigWigFile_t *initialise_bw_output(char *out_file, chromList_t *chromList){ return NULL; } -char *get_contig_from_region(char *region){ - int beg = 0; - int end = 0; - const char *q = hts_parse_reg(region, &beg, &end); - char *tmp = (char*)malloc(q - region + 1); - strncpy(tmp, region, q - region); - tmp[q - region] = 0; - return tmp; -} - uint32_t getContigLength(char *contig,chromList_t *chromList){ int i=0; for(i=0;inKeys;i++){ @@ -351,8 +341,7 @@ int main(int argc, char *argv[]){ if(include_zeroes == 1){ uint32_t reg_start; uint32_t reg_stop; - char *contig = malloc(sizeof(char) * 2048); - parseRegionString(our_region_list[i], contig, ®_start, ®_stop); + char *contig = parseRegionString(our_region_list[i], ®_start, ®_stop); uint32_t len = getContigLength(contig,chromList); check(len != -1,"Error fetching length of contig %s.",contig); //Append end of chromosome if zeroes diff --git a/c/bam2bwbases.c b/c/bam2bwbases.c index 307f21e..f7b60ff 100644 --- a/c/bam2bwbases.c +++ b/c/bam2bwbases.c @@ -346,10 +346,10 @@ int main(int argc, char *argv[]){ int chck = 0; uint32_t sta; uint32_t sto; - char *contig = malloc(sizeof(char) * 2048); + char *contig; int i=0; for(i=0;itarget_name[perbase[b].ltid],start,stop,result,perbase[b].ltid,chck); } } + free(contig); bam_hdr_destroy(perbase[0].head); } diff --git a/c/bam_access.c b/c/bam_access.c index 2d0be76..e3a05df 100644 --- a/c/bam_access.c +++ b/c/bam_access.c @@ -237,8 +237,7 @@ int process_bam_region_bases(char *input_file, bw_func_reg perbase_pileup_func, uint32_t last_pos = 0; uint32_t start; uint32_t stop; - char *contig = malloc(sizeof(char) * 2048); - parseRegionString(region, contig, &start, &stop); + char *contig = parseRegionString(region, &start, &stop); int x=0; for(x=0;x<4;x++){ perbase[x].idx = idx; @@ -338,8 +337,7 @@ int process_bam_region(char *input_file, bw_func_reg pileup_func, tmpstruct_t *t uint32_t last_pos = 0; uint32_t reg_sta; uint32_t reg_sto; - char *contig = malloc(sizeof(char) * 2048); - parseRegionString(region, contig, ®_sta, ®_sto); + char *contig = parseRegionString(region, ®_sta, ®_sto); if(reg_sta>1){ tmp->lstart=reg_sta-1; tmp->lpos=reg_sta-1; diff --git a/c/catbw.c b/c/catbw.c index a55579c..90e8b0c 100644 --- a/c/catbw.c +++ b/c/catbw.c @@ -137,10 +137,11 @@ int main(int argc, char *argv[]){ if(region!=NULL){ //Check and parse region - char contig[1024]= ""; - uint32_t start,stop; - int chk = parseRegionString(region,contig,&start,&stop); - check(chk>0,"Error parsing region string '%s'",region); + uint32_t start = 0; + uint32_t stop = 0; + char *contig = NULL; + contig = parseRegionString(region,&start,&stop); + check(contig!=NULL,"Error parsing region string '%s'",region); //retrieve region intervals //intervals = bwGetValues(fp, contig, start, stop, inc_na); intervals = bwGetOverlappingIntervals(fp, contig, start-1, stop); @@ -149,10 +150,12 @@ int main(int argc, char *argv[]){ uint32_t j=0; for(j=0;jl;j++){ //print interval + fprintf(stderr,"***INTERVAL****\t%s:%d-%d\n",contig,(intervals->start)[j],(intervals->end)[j]); fprintf(out,out_pattern,contig,(intervals->start)[j],(intervals->end)[j],(intervals->value)[j]); } bwDestroyOverlappingIntervals(intervals); } + free(contig); }else{ //No region so iterate through each contig listed in the header file //Read in the list of chromosomes diff --git a/c/utils.c b/c/utils.c index 0cbe1aa..487af42 100644 --- a/c/utils.c +++ b/c/utils.c @@ -33,6 +33,7 @@ #include #include #include +#include "htslib/sam.h" #include "utils.h" @@ -98,8 +99,11 @@ int line_count (char *file_path){ return -1; } -int parseRegionString(char *region, char *contig, uint32_t *start, uint32_t *stop){ - int check_parse = sscanf(region,region_format,contig,start,stop); - if(check_parse != 3) return 0; - return check_parse; +int parseRegionString(char *region, uint32_t *start, uint32_t *stop){ + const char *q = hts_parse_reg(region, start, stop); + char *contig = (char*)malloc(q - region + 1); + strncpy(contig, region, q - region); + contig[q - region] = 0; + *start = *start+1; + return contig; } \ No newline at end of file diff --git a/c/utils.h b/c/utils.h index 99849a1..830696f 100644 --- a/c/utils.h +++ b/c/utils.h @@ -44,6 +44,6 @@ int check_exist(char *fname); int parse_file_name( char *dir, char *fname, const char *file); -int parseRegionString(char *region, char *contig, uint32_t *start, uint32_t *stop); +int parseRegionString(char *region, uint32_t *start, uint32_t *stop); #endif \ No newline at end of file