|
3 | 3 | from pathlib import Path
|
4 | 4 |
|
5 | 5 | from cpython cimport PyBytes_Check, PyUnicode_Check
|
6 |
| -from pysam import AlignmentFile, FastxFile |
| 6 | +from pysam import AlignmentFile, FastxFile, samtools |
7 | 7 | import enum
|
8 | 8 |
|
9 | 9 |
|
@@ -96,22 +96,45 @@ cdef class BwaIndex:
|
96 | 96 |
|
97 | 97 | # Build the sequence dictionary
|
98 | 98 | dict_fn = Path(prefix.with_suffix(".dict"))
|
99 |
| - with FastxFile(filename=f"{fasta}") as reader, dict_fn.open("w") as writer: |
100 |
| - writer.write("@HD\tVN:1.5\tSO:unsorted\n") |
101 |
| - for rec in reader: |
102 |
| - writer.write(f"@SQ\tSN:{rec.name}\tLN:{len(rec.sequence)}\n") |
| 99 | + samtools.dict("-o", f"{dict_fn}", f"{fasta}") |
103 | 100 |
|
104 | 101 |
|
105 | 102 | cdef _load_index(self, prefix, mode):
|
106 |
| - prefix = bwa_idx_infer_prefix(force_bytes(prefix)) |
107 |
| - if not prefix: |
108 |
| - raise Exception(f"Could not find the index at: {prefix}") |
109 |
| -
|
110 |
| - self._delegate = bwa_idx_load(prefix, mode) |
111 |
| -
|
112 |
| - # the SAM header from the sequence dictionary |
113 |
| - seq_dict = Path(prefix.decode("utf-8")).with_suffix(".dict") |
114 |
| - # TODO: error message when seq_dict is missing? |
| 103 | + # infer the prefix from the hint |
| 104 | + prefix_char_ptr = bwa_idx_infer_prefix(force_bytes(prefix)) |
| 105 | + if not prefix_char_ptr: |
| 106 | + raise FileNotFoundError(f"could not locate the index [prefix]: {prefix}") |
| 107 | +
|
| 108 | + # the path to the inferred prefix |
| 109 | + prefix_path: Path = Path(prefix_char_ptr.decode("utf-8")) |
| 110 | +
|
| 111 | + # the path to the sequence dictionary |
| 112 | + seq_dict = prefix_path.with_suffix(".dict") |
| 113 | +
|
| 114 | + def assert_path_suffix_exists(suffix: str) -> None: |
| 115 | + new_path = prefix_path.with_suffix(prefix_path.suffix + suffix) |
| 116 | + if not new_path.exists(): |
| 117 | + raise FileNotFoundError(f"could not locate the index file [{suffix}]: {new_path}") |
| 118 | +
|
| 119 | + # Check that all index files exist |
| 120 | + if mode & BWA_IDX_BWT == BWA_IDX_BWT: |
| 121 | + assert_path_suffix_exists(".bwt") |
| 122 | + assert_path_suffix_exists(".sa") |
| 123 | + if mode & BWA_IDX_BNS == BWA_IDX_BNS: |
| 124 | + assert_path_suffix_exists(".ann") |
| 125 | + assert_path_suffix_exists(".amb") |
| 126 | + assert_path_suffix_exists(".pac") |
| 127 | + if mode & BWA_IDX_PAC == BWA_IDX_PAC: |
| 128 | + assert_path_suffix_exists(".pac") |
| 129 | + if not seq_dict.exists(): |
| 130 | + raise FileNotFoundError( |
| 131 | + f"could not locate the sequence dictionary [use `samtools dict`]: {seq_dict}" |
| 132 | + ) |
| 133 | +
|
| 134 | + # load the index |
| 135 | + self._delegate = bwa_idx_load(prefix_char_ptr, mode) |
| 136 | +
|
| 137 | + # load the SAM header from the sequence dictionary |
115 | 138 | with seq_dict.open("r") as fh:
|
116 | 139 | with AlignmentFile(fh) as reader:
|
117 | 140 | self.header = reader.header
|
|
0 commit comments