Skip to content

Commit

Permalink
Use new binary taxonomy for faster readin
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed Dec 14, 2020
1 parent f5e3c75 commit 74b273f
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 25 deletions.
24 changes: 2 additions & 22 deletions data/createsetdb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,24 +50,15 @@ if notExists "${OUTDB}.dbtype"; then
cp -f "$1_h" "${OUTDB}_h"
cp -f "$1_h.index" "${OUTDB}_h.index"
cp -f "$1_h.dbtype" "${OUTDB}_h.dbtype"

# if [ -z "${REVERSE_FRAGMENTS}" ] && [ -f "${1}_mapping" ]; then
# cp -f "${1}_mapping" "${OUTDB}_mapping"
# cp -f "${1}_nodes.dmp" "${OUTDB}_nodes.dmp"
# cp -f "${1}_names.dmp" "${OUTDB}_names.dmp"
# cp -f "${1}_merged.dmp" "${OUTDB}_merged.dmp"
# fi
fi
fi


# if notExists "${OUTDB}"; then
# # shellcheck disable=SC2086
# "${MMSEQS}" createdb "$@" "${OUTDB}" ${CREATEDB_PAR} \
# || fail "createdb failed"
# fi


if [ "$("${MMSEQS}" dbtype "${OUTDB}")" = "Nucleotide" ]; then
mv -f "${OUTDB}" "${OUTDB}_nucl"
mv -f "${OUTDB}.index" "${OUTDB}_nucl.index"
Expand All @@ -79,13 +70,6 @@ if [ "$("${MMSEQS}" dbtype "${OUTDB}")" = "Nucleotide" ]; then
mv -f "${OUTDB}_h.index" "${OUTDB}_nucl_h.index"
mv -f "${OUTDB}_h.dbtype" "${OUTDB}_nucl_h.dbtype"

# if [ -z "${REVERSE_FRAGMENTS}" ] && [ -f "${OUTDB}_mapping" ]; then
# ln -fs "${OUTDB}_mapping" "${OUTDB}_nucl_mapping"
# ln -fs "${OUTDB}_nodes.dmp" "${OUTDB}_nucl_nodes.dmp"
# ln -fs "${OUTDB}_names.dmp" "${OUTDB}_nucl_names.dmp"
# ln -fs "${OUTDB}_merged.dmp" "${OUTDB}_nucl_merged.dmp"
# fi

if notExists "${OUTDB}_nucl_contig_to_set.index"; then
awk '{ print $1"\t"$3; }' "${OUTDB}_nucl.lookup" | sort -k1,1n -k2,2n > "${OUTDB}_nucl_contig_to_set.tsv"
# shellcheck disable=SC2086
Expand Down Expand Up @@ -169,16 +153,12 @@ if [ "$("${MMSEQS}" dbtype "${OUTDB}")" = "Nucleotide" ]; then
if notExists "${OUTDB}_set_mapping"; then
awk 'NR == FNR { f[$1] = $2; next } $2 in f { print $1"\t"f[$2] }' \
"${TAXMAPPING}" "${OUTDB}.source" > "${OUTDB}_set_mapping"
ln -sf "${OUTDB}_nucl_orf_names.dmp" "${OUTDB}_set_names.dmp"
ln -sf "${OUTDB}_nucl_orf_nodes.dmp" "${OUTDB}_set_nodes.dmp"
ln -sf "${OUTDB}_nucl_orf_merged.dmp" "${OUTDB}_set_merged.dmp"
ln -sf "${OUTDB}_nucl_orf_taxonomy" "${OUTDB}_set_taxonomy"
awk 'BEGIN { printf("%c%c%c%c",18,0,0,0); exit; }' > "${OUTDB}_set.dbtype"
fi

if notExists "${OUTDB}_nucl_mapping"; then
ln -sf "${OUTDB}_nucl_orf_names.dmp" "${OUTDB}_nucl_names.dmp"
ln -sf "${OUTDB}_nucl_orf_nodes.dmp" "${OUTDB}_nucl_nodes.dmp"
ln -sf "${OUTDB}_nucl_orf_merged.dmp" "${OUTDB}_nucl_merged.dmp"
ln -sf "${OUTDB}_nucl_orf_taxonomy" "${OUTDB}_nucl_taxonomy"

# shellcheck disable=SC2086
"${MMSEQS}" createtaxdb "${OUTDB}_nucl" "${TMP_PATH}" --tax-mapping-mode 1 ${CREATETAXDB_PAR} ${THREADS_PAR} \
Expand Down
6 changes: 3 additions & 3 deletions src/util/SummarizeResults.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,9 @@ int summarizeresults(int argc, const char **argv, const Command& command) {
buffer.append(1, '\t');
buffer.append(SSTR(node->taxId));
buffer.append(1, '\t');
buffer.append(node->rank);
buffer.append(t->getString(node->rankIdx));
buffer.append(1, '\t');
buffer.append(node->name);
buffer.append(t->getString(node->nameIdx));
if (!ranks.empty()) {
buffer.append(1, '\t');
buffer.append(Util::implode(t->AtRanks(node, ranks), ';'));
Expand All @@ -164,7 +164,7 @@ int summarizeresults(int argc, const char **argv, const Command& command) {
}
}
}
buffer.append("\n");
buffer.append(1, '\n');
buffer.append(tmpBuffer);
}
tmpBuffer.clear();
Expand Down

0 comments on commit 74b273f

Please sign in to comment.