Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 53045d7

Browse files
committedJan 24, 2025·
feat: improve error messages when bwa index files are not found
1 parent f60e919 commit 53045d7

File tree

2 files changed

+43
-14
lines changed

2 files changed

+43
-14
lines changed
 

‎pybwa/libbwaindex.pyx

+37-14
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from pathlib import Path
44

55
from cpython cimport PyBytes_Check, PyUnicode_Check
6-
from pysam import AlignmentFile, FastxFile
6+
from pysam import AlignmentFile, FastxFile, samtools
77
import enum
88

99

@@ -96,22 +96,45 @@ cdef class BwaIndex:
9696
9797
# Build the sequence dictionary
9898
dict_fn = Path(prefix.with_suffix(".dict"))
99-
with FastxFile(filename=f"{fasta}") as reader, dict_fn.open("w") as writer:
100-
writer.write("@HD\tVN:1.5\tSO:unsorted\n")
101-
for rec in reader:
102-
writer.write(f"@SQ\tSN:{rec.name}\tLN:{len(rec.sequence)}\n")
99+
samtools.dict("-o", f"{dict_fn}", f"{fasta}")
103100
104101
105102
cdef _load_index(self, prefix, mode):
106-
prefix = bwa_idx_infer_prefix(force_bytes(prefix))
107-
if not prefix:
108-
raise Exception(f"Could not find the index at: {prefix}")
109-
110-
self._delegate = bwa_idx_load(prefix, mode)
111-
112-
# the SAM header from the sequence dictionary
113-
seq_dict = Path(prefix.decode("utf-8")).with_suffix(".dict")
114-
# TODO: error message when seq_dict is missing?
103+
# infer the prefix from the hint
104+
prefix_char_ptr = bwa_idx_infer_prefix(force_bytes(prefix))
105+
if not prefix_char_ptr:
106+
raise FileNotFoundError(f"could not locate the index [prefix]: {prefix}")
107+
108+
# the path to the inferred prefix
109+
prefix_path: Path = Path(prefix_char_ptr.decode("utf-8"))
110+
111+
# the path to the sequence dictionary
112+
seq_dict = prefix_path.with_suffix(".dict")
113+
114+
def assert_path_suffix_exists(suffix: str) -> None:
115+
new_path = prefix_path.with_suffix(prefix_path.suffix + suffix)
116+
if not new_path.exists():
117+
raise FileNotFoundError(f"could not locate the index file [{suffix}]: {new_path}")
118+
119+
# Check that all index files exist
120+
if mode & BWA_IDX_BWT == BWA_IDX_BWT:
121+
assert_path_suffix_exists(".bwt")
122+
assert_path_suffix_exists(".sa")
123+
if mode & BWA_IDX_BNS == BWA_IDX_BNS:
124+
assert_path_suffix_exists(".ann")
125+
assert_path_suffix_exists(".amb")
126+
assert_path_suffix_exists(".pac")
127+
if mode & BWA_IDX_PAC == BWA_IDX_PAC:
128+
assert_path_suffix_exists(".pac")
129+
if not seq_dict.exists():
130+
raise FileNotFoundError(
131+
f"could not locate the sequence dictionary [use `samtools dict`]: {seq_dict}"
132+
)
133+
134+
# load the index
135+
self._delegate = bwa_idx_load(prefix_char_ptr, mode)
136+
137+
# load the SAM header from the sequence dictionary
115138
with seq_dict.open("r") as fh:
116139
with AlignmentFile(fh) as reader:
117140
self.header = reader.header

‎tests/test_bwapy.py

+6
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ def test_bwa_index_build(ref_fasta: Path, tmp_path_factory: pytest.TempPathFacto
2525

2626
# Build the index
2727
BwaIndex.index(fasta=ref_fasta, prefix=prefix)
28+
29+
# Check the files exist
30+
for suffix in [".bwt", ".sa", ".ann", ".pac"]:
31+
assert prefix.with_suffix(prefix.suffix + suffix).exists()
32+
assert prefix.with_suffix(".dict").exists()
33+
2834
# Load it
2935
BwaIndex(prefix=prefix)
3036

0 commit comments

Comments
 (0)
Please sign in to comment.