From 4e9c8afac1864ad708232d51243b2caa6df3ea0c Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 21 Jul 2017 14:13:15 +0100 Subject: [PATCH 01/10] #Fixes 22 and skips install of tools already present, no version checks though --- setup.sh | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/setup.sh b/setup.sh index 99013b2..d9d5d6d 100755 --- a/setup.sh +++ b/setup.sh @@ -1,7 +1,7 @@ #!/bin/bash ##########LICENCE########## -# Copyright (c) 2014-2016 Genome Research Ltd. +# Copyright (c) 2014-2017 Genome Research Ltd. # # Author: Cancer Genome Project cgpit@sanger.ac.uk # @@ -25,6 +25,9 @@ SOURCE_SAMTOOLS="https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2" BIODBHTS_INSTALL="https://raw.githubusercontent.com/Ensembl/Bio-HTS/master/INSTALL.pl" SOURCE_VCFTOOLS="https://github.com/vcftools/vcftools/releases/download/v0.1.14/vcftools-0.1.14.tar.gz" +# Warning bedtools 2.24.0 and 2.25.0 have a swapped usage in coverageBed +# No upgrades until [this ticket](https://github.com/arq5x/bedtools2/issues/319) is resolved +SOURCE_BEDTOOLS="https://github.com/arq5x/bedtools2/releases/download/v2.21.0/bedtools-2.21.0.tar.gz" get_distro () { EXT="" @@ -62,7 +65,7 @@ if [[ ($# -ne 1 && $# -ne 2) ]] ; then echo "Please provide an installation path and optionally perl lib paths to allow, e.g." echo " ./setup.sh /opt/myBundle" echo "OR all elements versioned:" - echo " ./setup.sh /opt/cgpVcf-X.X.X /opt/PCAP-X.X.X/lib/perl" + echo " ./setup.sh /opt/myBundle /opt/cgpVcf-X.X.X/lib/perl5:/opt/PCAP-X.X.X/lib/perl5" exit 0 fi @@ -127,11 +130,28 @@ for i in "${perlmods[@]}" ; do $CPANM --notest --mirror http://cpan.metacpan.org -l $INST_PATH $i done +echo -n "Building bedtools2 ..." +if [ -e $SETUP_DIR/bedtools.success ]; then + echo -n " previously installed (resumed)..."; +elif [ -e $INST_PATH/bin/bedtools ]; then + echo -n " previously installed ..."; +else + cd $SETUP_DIR + get_distro "bedtools2" $SOURCE_BEDTOOLS + mkdir -p bedtools2 + tar --strip-components 1 -C bedtools2 -zxf bedtools2.tar.gz + make -C bedtools2 -j$CPU + cp bedtools2/bin/* $INST_PATH/bin/. + touch $SETUP_DIR/bedtools.success +fi + CURR_TOOL="vcftools" CURR_SOURCE=$SOURCE_VCFTOOLS echo -n "Building $CURR_TOOL ..." if [ -e $SETUP_DIR/$CURR_TOOL.success ]; then - echo -n " previously installed ..." + echo -n " previously installed (resumed) ..." +elif [ -e $INST_PATH/bin/$CURR_TOOL ]; then + echo -n " previously installed ..."; else get_distro $CURR_TOOL $CURR_SOURCE cd $SETUP_DIR/$CURR_TOOL @@ -144,6 +164,8 @@ fi echo -n "Building samtools ..." if [ -e "$SETUP_DIR/samtools.success" ]; then + echo -n " previously installed (resumed) ..."; +elif [ -e $INST_PATH/bin/samtools ]; then echo -n " previously installed ..."; else cd $SETUP_DIR From 1433704cc5bb2dba5bfe8026ca4b3c5b360c55e1 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 21 Jul 2017 14:34:31 +0100 Subject: [PATCH 02/10] bump the version --- lib/Sanger/CGP/Vagrent.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Sanger/CGP/Vagrent.pm b/lib/Sanger/CGP/Vagrent.pm index 51b8a3b..2bf1a5f 100644 --- a/lib/Sanger/CGP/Vagrent.pm +++ b/lib/Sanger/CGP/Vagrent.pm @@ -1,7 +1,7 @@ package Sanger::CGP::Vagrent; ##########LICENCE########## -# Copyright (c) 2014-2016 Genome Research Ltd. +# Copyright (c) 2014-2017 Genome Research Ltd. # # Author: Cancer Genome Project cgpit@sanger.ac.uk # @@ -26,7 +26,7 @@ use strict; use Const::Fast qw(const); use base 'Exporter'; -our $VERSION = '3.2.1'; +our $VERSION = '3.2.3'; our @EXPORT = qw($VERSION); 1; From 9d67873f0253a5e0b70cf675548c2798de5f8950 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 21 Jul 2017 14:58:54 +0100 Subject: [PATCH 03/10] Fix the inadvertent 'always sort' and change from vcf-sort to one which works correctly with indels --- bin/AnnotateVcf.pl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bin/AnnotateVcf.pl b/bin/AnnotateVcf.pl index 826306f..195eadd 100755 --- a/bin/AnnotateVcf.pl +++ b/bin/AnnotateVcf.pl @@ -34,6 +34,7 @@ use List::Util qw(first); use File::Temp qw(tempfile); +use File::Copy qw(copy); use Try::Tiny qw(try catch); use FindBin qw($Bin); @@ -78,7 +79,7 @@ const my $REPRE_BM => Sanger::CGP::Vagrent::Bookmarkers::RepresentativeTranscriptBookmarker->new(); const my $WORST_BM => Sanger::CGP::Vagrent::Bookmarkers::MostDeleteriousBookmarker->new(); -const my $SORT_CMD => 'cat %s | vcf-sort > %s'; +const my $SORT_CMD => q{(grep -B 100000000 -m 1 '^#CHROM' %s ; grep -v '^#' %s | sort -k1,1 -k2,2n -k4,4 -k5,5) > %s}; const my $BGZIP_CMD => 'bgzip %s'; const my $TABIX_CMB => 'tabix -p vcf %s'; @@ -119,14 +120,17 @@ sub compressAndIndex { my ($options, $tmpfile) = @_; - my $sort_cmd = sprintf $SORT_CMD, $tmpfile, $options->{'output'}; + my $sort_cmd = sprintf $SORT_CMD, $tmpfile, $tmpfile, $options->{'output'}; my $bgzip_cmd = sprintf $BGZIP_CMD, $options->{'output'}; my $totabix = $options->{'output'} .'.gz'; my $tabix_cmd = sprintf $TABIX_CMB, $totabix; try { - my $tabix_in = $options->{'input'}.'.tbx'; - unless(-e $tabix_in){ + my $tabix_in = $options->{'input'}.'.tbi'; + if(-e $tabix_in) { + copy($tmpfile, $options->{'output'}); + } + else { # If the input has a tabix index it must have already been sorted, # we haven't changed the order of the file so we can skip this sort system($sort_cmd); From 42074f507e001de4de234af500db36cd56359e1c Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 21 Jul 2017 14:58:54 +0100 Subject: [PATCH 04/10] Fixes #23 also fix the inadvertent 'always sort' --- bin/AnnotateVcf.pl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bin/AnnotateVcf.pl b/bin/AnnotateVcf.pl index 826306f..195eadd 100755 --- a/bin/AnnotateVcf.pl +++ b/bin/AnnotateVcf.pl @@ -34,6 +34,7 @@ use List::Util qw(first); use File::Temp qw(tempfile); +use File::Copy qw(copy); use Try::Tiny qw(try catch); use FindBin qw($Bin); @@ -78,7 +79,7 @@ const my $REPRE_BM => Sanger::CGP::Vagrent::Bookmarkers::RepresentativeTranscriptBookmarker->new(); const my $WORST_BM => Sanger::CGP::Vagrent::Bookmarkers::MostDeleteriousBookmarker->new(); -const my $SORT_CMD => 'cat %s | vcf-sort > %s'; +const my $SORT_CMD => q{(grep -B 100000000 -m 1 '^#CHROM' %s ; grep -v '^#' %s | sort -k1,1 -k2,2n -k4,4 -k5,5) > %s}; const my $BGZIP_CMD => 'bgzip %s'; const my $TABIX_CMB => 'tabix -p vcf %s'; @@ -119,14 +120,17 @@ sub compressAndIndex { my ($options, $tmpfile) = @_; - my $sort_cmd = sprintf $SORT_CMD, $tmpfile, $options->{'output'}; + my $sort_cmd = sprintf $SORT_CMD, $tmpfile, $tmpfile, $options->{'output'}; my $bgzip_cmd = sprintf $BGZIP_CMD, $options->{'output'}; my $totabix = $options->{'output'} .'.gz'; my $tabix_cmd = sprintf $TABIX_CMB, $totabix; try { - my $tabix_in = $options->{'input'}.'.tbx'; - unless(-e $tabix_in){ + my $tabix_in = $options->{'input'}.'.tbi'; + if(-e $tabix_in) { + copy($tmpfile, $options->{'output'}); + } + else { # If the input has a tabix index it must have already been sorted, # we haven't changed the order of the file so we can skip this sort system($sort_cmd); From e48c5f62fdee6eccd924267813b73b1ed8da462f Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 21 Jul 2017 15:02:39 +0100 Subject: [PATCH 05/10] Update licence dates as file changed --- bin/AnnotateVcf.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/AnnotateVcf.pl b/bin/AnnotateVcf.pl index 195eadd..b70615a 100755 --- a/bin/AnnotateVcf.pl +++ b/bin/AnnotateVcf.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl ##########LICENCE########## -# Copyright (c) 2014 Genome Research Ltd. +# Copyright (c) 2014-2017 Genome Research Ltd. # # Author: Cancer Genome Project cgpit@sanger.ac.uk # From 6641385669809a337aa7e41886f8d4b1b32cd96d Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 21 Jul 2017 15:10:28 +0100 Subject: [PATCH 06/10] VAGrENT doesn't depend on PCAP soe slightly different HTS lib build if not found --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 59edec1..a9f58af 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ addons: - libncurses5-dev - libpstreams-dev - libcurl4-openssl-dev + - liblzma-dev install: true From 563a5d4448d4904bf45a6950240e2d32c4bcd411 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Sat, 22 Jul 2017 07:25:38 +0100 Subject: [PATCH 07/10] Fixup hts* builds, more streamlined --- setup.sh | 105 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 18 deletions(-) diff --git a/setup.sh b/setup.sh index d9d5d6d..26484f9 100755 --- a/setup.sh +++ b/setup.sh @@ -21,9 +21,9 @@ # along with this program. If not, see . ##########LICENCE########## - SOURCE_SAMTOOLS="https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2" -BIODBHTS_INSTALL="https://raw.githubusercontent.com/Ensembl/Bio-HTS/master/INSTALL.pl" +SOURCE_HTSLIB="https://github.com/samtools/htslib/releases/download/1.3.2/htslib-1.3.2.tar.bz2" +SOURCE_BIOBDHTS="https://github.com/Ensembl/Bio-HTS/archive/2.3.tar.gz" SOURCE_VCFTOOLS="https://github.com/vcftools/vcftools/releases/download/v0.1.14/vcftools-0.1.14.tar.gz" # Warning bedtools 2.24.0 and 2.25.0 have a swapped usage in coverageBed # No upgrades until [this ticket](https://github.com/arq5x/bedtools2/issues/319) is resolved @@ -162,33 +162,102 @@ else touch $SETUP_DIR/$CURR_TOOL.success fi -echo -n "Building samtools ..." -if [ -e "$SETUP_DIR/samtools.success" ]; then - echo -n " previously installed (resumed) ..."; -elif [ -e $INST_PATH/bin/samtools ]; then - echo -n " previously installed ..."; +if [ -e $SETUP_DIR/htslibGet.success ]; then + echo " already staged ..."; else + echo cd $SETUP_DIR - get_distro "samtools" $SOURCE_SAMTOOLS - cd samtools + get_distro "htslib" $SOURCE_HTSLIB + touch $SETUP_DIR/htslibGet.success +fi + +echo -n "Building htslib ..." +if [ -e $SETUP_DIR/htslib.success ]; then + echo " previously installed ..."; +else + echo + mkdir -p htslib + tar --strip-components 1 -C htslib -jxf htslib.tar.bz2 + cd htslib ./configure --enable-plugins --enable-libcurl --prefix=$INST_PATH - make all all-htslib - make install install-htslib - touch $SETUP_DIR/samtools.success + make -j$CPU + make install + cd $SETUP_DIR + touch $SETUP_DIR/htslib.success fi +export HTSLIB=$INST_PATH + CHK=`perl -le 'eval "require $ARGV[0]" and print $ARGV[0]->VERSION' Bio::DB::HTS` if [[ "x$CHK" == "x" ]] ; then echo -n "Building Bio::DB::HTS ..." - cd $SETUP_DIR - # now Bio::DB::HTS - get_file "INSTALL.pl" $BIODBHTS_INSTALL - perl -I $PERL5LIB INSTALL.pl --prefix $INST_PATH --static - rm -f BioDbHTS_INSTALL.pl + if [ -e $SETUP_DIR/biohts.success ]; then + echo " previously installed ..."; + else + echo + cd $SETUP_DIR + cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Module::Build + cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Bio::Root::Version + rm -rf bioDbHts + get_distro "bioDbHts" $SOURCE_BIOBDHTS + tar --strip-components 1 -C bioDbHts -zxf bioDbHts.tar.gz + cd bioDbHts + perl Build.PL --install_base=$INST_PATH --htslib=$INST_PATH + ./Build test + ./Build install + cd $SETUP_DIR + rm -f bioDbHts.tar.gz + touch $SETUP_DIR/biohts.success + fi else - echo "Bio::DB::HTS already installed" + echo "Bio::DB::HTS already installed ..." fi +if [ -e $SETUP_DIR/samtools.success ]; then + echo " previously installed ..."; +else +echo + cd $SETUP_DIR + rm -rf samtools + get_distro "samtools" $SOURCE_SAMTOOLS + mkdir -p samtools + tar --strip-components 1 -C samtools -xjf samtools.tar.bz2 + cd samtools + ./configure --enable-plugins --enable-libcurl --with-htslib=$HTSLIB --prefix=$INST_PATH + make -j$CPU all + make install + cd $SETUP_DIR + rm -f samtools.tar.bz2 + touch $SETUP_DIR/samtools.success +fi + +# echo -n "Building samtools ..." +# if [ -e "$SETUP_DIR/samtools.success" ]; then +# echo -n " previously installed (resumed) ..."; +# elif [ -e $INST_PATH/bin/samtools ]; then +# echo -n " previously installed ..."; +# else +# cd $SETUP_DIR +# get_distro "samtools" $SOURCE_SAMTOOLS +# cd samtools +# ./configure --enable-plugins --enable-libcurl --prefix=$INST_PATH +# make all all-htslib +# make install install-htslib +# touch $SETUP_DIR/samtools.success +# fi +# +# CHK=`perl -le 'eval "require $ARGV[0]" and print $ARGV[0]->VERSION' Bio::DB::HTS` +# if [[ "x$CHK" == "x" ]] ; then +# echo -n "Building Bio::DB::HTS ..." +# cd $SETUP_DIR +# # now Bio::DB::HTS +# get_file "INSTALL.pl" $BIODBHTS_INSTALL +# perl -I $PERL5LIB INSTALL.pl --prefix $INST_PATH --static +# rm -f BioDbHTS_INSTALL.pl +# else +# echo "Bio::DB::HTS already installed" +# fi + cd $INIT_DIR echo -n "Installing Perl prerequisites ..." From 6434a920aaeea4536a2d95726f5ffc09712450f2 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Sat, 22 Jul 2017 07:34:46 +0100 Subject: [PATCH 08/10] Remove duplicate install of BioPerl resulting in upgrade to untested --- setup.sh | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/setup.sh b/setup.sh index 26484f9..1576bf7 100755 --- a/setup.sh +++ b/setup.sh @@ -123,7 +123,7 @@ perl $SETUP_DIR/cpanm -l $INST_PATH App::cpanminus CPANM=`which cpanm` echo $CPANM -perlmods=( "File::ShareDir" "File::ShareDir::Install" "Bio::Root::Version@1.006924") +perlmods=( "File::ShareDir" "File::ShareDir::Install" "Module::Build" "Bio::Root::Version@1.006924") for i in "${perlmods[@]}" ; do echo -n "Installing build prerequisite $i..." @@ -196,8 +196,6 @@ if [[ "x$CHK" == "x" ]] ; then else echo cd $SETUP_DIR - cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Module::Build - cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Bio::Root::Version rm -rf bioDbHts get_distro "bioDbHts" $SOURCE_BIOBDHTS tar --strip-components 1 -C bioDbHts -zxf bioDbHts.tar.gz @@ -231,33 +229,6 @@ echo touch $SETUP_DIR/samtools.success fi -# echo -n "Building samtools ..." -# if [ -e "$SETUP_DIR/samtools.success" ]; then -# echo -n " previously installed (resumed) ..."; -# elif [ -e $INST_PATH/bin/samtools ]; then -# echo -n " previously installed ..."; -# else -# cd $SETUP_DIR -# get_distro "samtools" $SOURCE_SAMTOOLS -# cd samtools -# ./configure --enable-plugins --enable-libcurl --prefix=$INST_PATH -# make all all-htslib -# make install install-htslib -# touch $SETUP_DIR/samtools.success -# fi -# -# CHK=`perl -le 'eval "require $ARGV[0]" and print $ARGV[0]->VERSION' Bio::DB::HTS` -# if [[ "x$CHK" == "x" ]] ; then -# echo -n "Building Bio::DB::HTS ..." -# cd $SETUP_DIR -# # now Bio::DB::HTS -# get_file "INSTALL.pl" $BIODBHTS_INSTALL -# perl -I $PERL5LIB INSTALL.pl --prefix $INST_PATH --static -# rm -f BioDbHTS_INSTALL.pl -# else -# echo "Bio::DB::HTS already installed" -# fi - cd $INIT_DIR echo -n "Installing Perl prerequisites ..." From 79240d5ab7d3ac793b78626f4c56f8921e567249 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Sat, 22 Jul 2017 07:51:32 +0100 Subject: [PATCH 09/10] fixup docs and earlier addition of lib to travis --- .travis.yml | 1 - CHANGES.md | 8 ++++++++ README.md | 12 +++++++----- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index a9f58af..59edec1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,7 +15,6 @@ addons: - libncurses5-dev - libpstreams-dev - libcurl4-openssl-dev - - liblzma-dev install: true diff --git a/CHANGES.md b/CHANGES.md index 0c0a10a..a3af7bf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,11 @@ +### 3.2.2 +* Add bedtools2 to `setup.sh` +* Added bedtools2 to `README.md` +* Changes `Bio::DB::HTS`, `samtools` and `HTSlib` install methods. +* Corrected condition indicating sort is required. +* Fixes #23 Changed from vcf-sort to normal linux sort to ensure multiple indels with +same start coord are sorted in a stable way. + ### 3.2.0 * Allows use of ensemblgenomes.org as a datasource * Handle genes without names, and give more useful error message diff --git a/README.md b/README.md index 7d65f1e..639ebc6 100644 --- a/README.md +++ b/README.md @@ -15,12 +15,14 @@ as well as Sequence Ontology terms to classify its consequences. --- -###Dependencies/Install +### Dependencies/Install Some of the code included in this package has dependencies on several packages: * [Samtools v1.3+](https://github.com/samtools/samtools) * [vcftools](https://vcftools.github.io/) * [Bio::DB::HTS](http://search.cpan.org/~rishidev/Bio-DB-HTS/) + * [bedtools2](http://bedtools.readthedocs.io/en/latest/index.html) + * Not >=2.24.0, no upgrades until [this ticket](https://github.com/arq5x/bedtools2/issues/319) is resolved (which may involve code changes) And various perl modules. @@ -30,12 +32,12 @@ Please be aware that this expects basic C compilation libraries and tools to be --- -##Creating a release -####Preparation +## Creating a release +#### Preparation * Commit/push all relevant changes. * Pull a clean version of the repo and use this for the following steps. -####Cutting the release +#### Cutting the release 1. Update `lib/Sanger/CGP/Vagrent.pm` to the correct version. 2. Update `CHANGES.md` to show major items. 3. Run `./prerelease.sh` @@ -47,7 +49,7 @@ Please be aware that this expects basic C compilation libraries and tools to be LICENCE ======= -Copyright (c) 2014-2016 Genome Research Ltd. +Copyright (c) 2014-2017 Genome Research Ltd. Author: Cancer Genome Project From 53d26702ca81cabc7c117724ce251d91148f87bc Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Sat, 22 Jul 2017 08:14:06 +0100 Subject: [PATCH 10/10] Correct version number --- lib/Sanger/CGP/Vagrent.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Sanger/CGP/Vagrent.pm b/lib/Sanger/CGP/Vagrent.pm index 2bf1a5f..6d0c9eb 100644 --- a/lib/Sanger/CGP/Vagrent.pm +++ b/lib/Sanger/CGP/Vagrent.pm @@ -26,7 +26,7 @@ use strict; use Const::Fast qw(const); use base 'Exporter'; -our $VERSION = '3.2.3'; +our $VERSION = '3.2.2'; our @EXPORT = qw($VERSION); 1;