Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added option to use all taxons. Added uninstall makefile target. #87

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/convertNR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ int main(int argc, char **argv) {
bool verbose = false;
bool debug = false;
bool addAcc = false;
bool all_taxa = false;

// --------------------- START ------------------------------------------------------------------
// Read command line params
int c;
while ((c = getopt(argc, argv, "ahdvrl:g:t:i:o:")) != -1) {
while ((c = getopt(argc, argv, "ahdvrAl:g:t:i:o:")) != -1) {
switch (c) {
case 'h':
usage(argv[0]);
Expand All @@ -55,6 +56,8 @@ int main(int argc, char **argv) {
verbose = true; break;
case 'a':
addAcc = true; break;
case 'A':
all_taxa = true; break;
case 'l':
list_filename = optarg; break;
case 't':
Expand Down Expand Up @@ -84,7 +87,8 @@ int main(int argc, char **argv) {
parseNodesDmp(*nodes,nodes_file);
nodes_file.close();

if(list_filename.length()==0) {
if(all_taxa) {std::cerr << "Using all taxa, due to '-A' option." << std::endl;}
else if(list_filename.length()==0) {
std::cerr << "No taxa list specified, using Archaea, Bacteria, and Viruses." << std::endl;
include_ids.insert((uint64_t)2);
include_ids.insert((uint64_t)2157);
Expand Down Expand Up @@ -162,6 +166,9 @@ int main(int argc, char **argv) {
inputfile.open(nr_filename);
if(!inputfile.is_open()) { error("Could not open file " + nr_filename); exit(EXIT_FAILURE); }
}
else {
nr_filename = "std::cin";
}

if(verbose) std::cerr << "Writing to file " << out_filename << std::endl;
std::ofstream out_file;
Expand Down Expand Up @@ -209,7 +216,7 @@ int main(int argc, char **argv) {
if(nodes->count(lca)==0) { std::cerr << "Taxon ID " << lca << " not found in taxonomy!" << std::endl; continue; }
uint64_t id = lca;
while(nodes->count(id)>0 && id != 1) {
if(include_ids.count(id) > 0) {
if(all_taxa || include_ids.count(id) > 0) {
keep = true;
break;
}
Expand Down Expand Up @@ -264,6 +271,7 @@ void usage(char *progname) {
fprintf(stderr, " -o FILENAME Name of output file.\n");
fprintf(stderr, "Optional arguments:\n");
fprintf(stderr, " -a Prefix taxon ID in database names with the first Accession.Ver\n");
fprintf(stderr, " -A Use all taxids. This overrides -l and the default (only Archaea, Bacteria, and Viruses)\n");
fprintf(stderr, " -i FILENAME Name of NR file. If this option is not used, then the program will read from STDIN.\n");
fprintf(stderr, " -l FILENAME Name of file containing IDs of taxa that will be extracted from the NR file. The IDs must be contained in nodes.dmp.\n");
exit(EXIT_FAILURE);
Expand Down
5 changes: 4 additions & 1 deletion src/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ convertNR: makefile bwt/mkbwt Config.o convertNR.o util.o $(BLASTOBJS)


clean:
rm -f -v bwt/mkbwt bwt/mkfmi kaiju kaijux kaijup kaiju2krona mergeOutputs kaijuReport convertNR addTaxonNames ../bin/*
rm -f -v bwt/mkbwt bwt/mkfmi kaiju kaijux kaijup kaiju2krona mergeOutputs kaijuReport convertNR addTaxonNames
find . -name "*.o" -delete
$(MAKE) -C bwt/ clean

uninstall:
rm -fv ../bin/*

static: LDFLAGS = -static
static: LDLIBS = $(LD_LIBS_STATIC)
static: all
Expand Down
27 changes: 25 additions & 2 deletions util/makeDB.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ db_refseq=0
db_progenomes=0
db_nr=0
db_euk=0
db_nr_all=0
db_mar=0
db_plasmids=0
threadsBWT=5
Expand All @@ -37,6 +38,9 @@ echo
echo "$s" -n NCBI BLAST non-redundant protein database \"nr\":
echo "$tab" only Archaea, bacteria, and viruses
echo
echo "$s" -N NCBI BLAST non-redundant protein database \"nr\":
echo "$tab" all taxa
echo
echo "$s" -e NCBI BLAST non-redundant protein database \"nr\":
echo "$tab" like -n, but additionally including fungi and microbial eukaryotes
echo
Expand Down Expand Up @@ -87,6 +91,9 @@ while :; do
-e|--euk)
db_euk=1
;;
-N|--nra)
db_nr_all=1
;;
-v|--viruses)
db_viruses=1
;;
Expand Down Expand Up @@ -115,7 +122,7 @@ while :; do
shift
done

[ $db_plasmids -eq 1 -o $db_viruses -eq 1 -o $db_refseq -eq 1 -o $db_progenomes -eq 1 -o $db_nr -eq 1 -o $db_euk -eq 1 -o $db_mar -eq 1 ] || { echo "Error: Use one of the options -r, -p, -n, -v, -l, -m, or -e"; usage; exit 1; }
[ $db_plasmids -eq 1 -o $db_viruses -eq 1 -o $db_refseq -eq 1 -o $db_progenomes -eq 1 -o $db_nr -eq 1 -o $db_euk -eq 1 -o $db_nr_all -eq 1 -o $db_mar -eq 1 ] || { echo "Error: Use one of the options -r, -p, -n, -v, -l, -m, or -e"; usage; exit 1; }

#check if necessary programs are in the PATH
command -v awk >/dev/null 2>/dev/null || { echo Error: awk not found; exit 1; }
Expand Down Expand Up @@ -188,7 +195,7 @@ then
fi


if [ $db_nr -eq 1 -o $db_euk -eq 1 ]
if [ $db_nr -eq 1 -o $db_euk -eq 1 -o $db_nr_all -eq 1 ]
then
if [ $DL -eq 1 ]
then
Expand Down Expand Up @@ -217,6 +224,22 @@ then
echo Kaiju only needs the files kaiju_db_nr_euk.fmi, nodes.dmp, and names.dmp.
echo The remaining files can be deleted.
echo
elif [ $db_nr_all -eq 1 ]
then
if [ $index_only -eq 0 ]
then
echo Converting NR file to Kaiju database
gunzip -c nr.gz | convertNR -t nodes.dmp -g prot.accession2taxid -a -o kaiju_db_nr_all.faa -A
fi
[ -r kaiju_db_nr_all.faa ] || { echo Missing file kaiju_db_nr_all.faa; exit 1; }
echo Creating BWT from Kaiju database
mkbwt -e $exponentSA_NR -n $threadsBWT -a ACDEFGHIKLMNPQRSTVWY -o kaiju_db_nr_all kaiju_db_nr_all.faa
echo Creating FM-index
mkfmi kaiju_db_nr_all
echo Done!
echo Kaiju only needs the files kaiju_db_nr_all.fmi, nodes.dmp, and names.dmp.
echo The remaining files can be deleted.
echo
else
if [ $index_only -eq 0 ]
then
Expand Down