Use new binary taxonomy for faster readin

soedinglab · Dec 14, 2020 · 74b273f · 74b273f
1 parent f5e3c75
commit 74b273f
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 25 deletions.
diff --git a/data/createsetdb.sh b/data/createsetdb.sh
@@ -50,24 +50,15 @@ if notExists "${OUTDB}.dbtype"; then
         cp -f "$1_h" "${OUTDB}_h"
         cp -f "$1_h.index" "${OUTDB}_h.index"
         cp -f "$1_h.dbtype" "${OUTDB}_h.dbtype"
-
-#        if [ -z "${REVERSE_FRAGMENTS}" ] && [ -f "${1}_mapping" ]; then
-#            cp -f "${1}_mapping" "${OUTDB}_mapping"
-#            cp -f "${1}_nodes.dmp" "${OUTDB}_nodes.dmp"
-#            cp -f "${1}_names.dmp" "${OUTDB}_names.dmp"
-#            cp -f "${1}_merged.dmp" "${OUTDB}_merged.dmp"
-#        fi
     fi
 fi
 
-
 # if notExists "${OUTDB}"; then
 #     # shellcheck disable=SC2086
 #     "${MMSEQS}" createdb "$@" "${OUTDB}" ${CREATEDB_PAR} \
 #         || fail "createdb failed"
 # fi
 
-
 if [ "$("${MMSEQS}" dbtype "${OUTDB}")" = "Nucleotide" ]; then
     mv -f "${OUTDB}" "${OUTDB}_nucl"
     mv -f "${OUTDB}.index" "${OUTDB}_nucl.index"
@@ -79,13 +70,6 @@ if [ "$("${MMSEQS}" dbtype "${OUTDB}")" = "Nucleotide" ]; then
     mv -f "${OUTDB}_h.index" "${OUTDB}_nucl_h.index"
     mv -f "${OUTDB}_h.dbtype" "${OUTDB}_nucl_h.dbtype"
 
-#    if [ -z "${REVERSE_FRAGMENTS}" ] && [ -f "${OUTDB}_mapping" ]; then
-#        ln -fs "${OUTDB}_mapping" "${OUTDB}_nucl_mapping"
-#        ln -fs "${OUTDB}_nodes.dmp" "${OUTDB}_nucl_nodes.dmp"
-#        ln -fs "${OUTDB}_names.dmp" "${OUTDB}_nucl_names.dmp"
-#        ln -fs "${OUTDB}_merged.dmp" "${OUTDB}_nucl_merged.dmp"
-#    fi
-
     if notExists "${OUTDB}_nucl_contig_to_set.index"; then
         awk '{ print $1"\t"$3; }' "${OUTDB}_nucl.lookup" | sort -k1,1n -k2,2n > "${OUTDB}_nucl_contig_to_set.tsv"
         # shellcheck disable=SC2086
@@ -169,16 +153,12 @@ if [ "$("${MMSEQS}" dbtype "${OUTDB}")" = "Nucleotide" ]; then
         if notExists "${OUTDB}_set_mapping"; then
             awk 'NR == FNR { f[$1] = $2; next } $2 in f { print $1"\t"f[$2] }' \
                 "${TAXMAPPING}" "${OUTDB}.source" > "${OUTDB}_set_mapping"
-            ln -sf "${OUTDB}_nucl_orf_names.dmp" "${OUTDB}_set_names.dmp"
-            ln -sf "${OUTDB}_nucl_orf_nodes.dmp" "${OUTDB}_set_nodes.dmp"
-            ln -sf "${OUTDB}_nucl_orf_merged.dmp" "${OUTDB}_set_merged.dmp"
+            ln -sf "${OUTDB}_nucl_orf_taxonomy" "${OUTDB}_set_taxonomy"
             awk 'BEGIN { printf("%c%c%c%c",18,0,0,0); exit; }' > "${OUTDB}_set.dbtype"
         fi
 
         if notExists "${OUTDB}_nucl_mapping"; then
-            ln -sf "${OUTDB}_nucl_orf_names.dmp" "${OUTDB}_nucl_names.dmp"
-            ln -sf "${OUTDB}_nucl_orf_nodes.dmp" "${OUTDB}_nucl_nodes.dmp"
-            ln -sf "${OUTDB}_nucl_orf_merged.dmp" "${OUTDB}_nucl_merged.dmp"
+            ln -sf "${OUTDB}_nucl_orf_taxonomy" "${OUTDB}_nucl_taxonomy"
 
             # shellcheck disable=SC2086
             "${MMSEQS}" createtaxdb "${OUTDB}_nucl" "${TMP_PATH}" --tax-mapping-mode 1 ${CREATETAXDB_PAR} ${THREADS_PAR} \

diff --git a/src/util/SummarizeResults.cpp b/src/util/SummarizeResults.cpp
@@ -147,9 +147,9 @@ int summarizeresults(int argc, const char **argv, const Command& command) {
                             buffer.append(1, '\t');
                             buffer.append(SSTR(node->taxId));
                             buffer.append(1, '\t');
-                            buffer.append(node->rank);
+                            buffer.append(t->getString(node->rankIdx));
                             buffer.append(1, '\t');
-                            buffer.append(node->name);
+                            buffer.append(t->getString(node->nameIdx));
                             if (!ranks.empty()) {
                                 buffer.append(1, '\t');
                                 buffer.append(Util::implode(t->AtRanks(node, ranks), ';'));
@@ -164,7 +164,7 @@ int summarizeresults(int argc, const char **argv, const Command& command) {
                             }
                         }
                     }
-                    buffer.append("\n");
+                    buffer.append(1, '\n');
                     buffer.append(tmpBuffer);
                 }
                 tmpBuffer.clear();