Skip to content

Commit

Permalink
Changed chromosome regular expression for nematode Roman numerals
Browse files Browse the repository at this point in the history
  • Loading branch information
jgrg committed Jul 18, 2024
1 parent 26b0a94 commit 601d2b8
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions src/tola/assembly/build_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,15 @@ def make_chr_name(self, scaffold: Scaffold) -> None:
for tag in scaffold.fragment_tags():
if tag == "Painted":
is_painted = True
elif m := re.match(r"[A-Z]\d*$", tag):
elif re.fullmatch(r"([A-Z]\d*|[IVX_]+)", tag):
# This tag looks like a chromosome name
cn = m.group(0)
if chr_name and cn != chr_name:
if chr_name and tag != chr_name:
msg = (
f"Found more than one chr_name name: '{chr_name}'"
f" and '{cn}' in scaffold:\n\n{scaffold}"
f" and '{tag}' in scaffold:\n\n{scaffold}"
)
raise ValueError(msg)
chr_name = cn
chr_name = tag
# Keep chromosome numbering in sync with Pretext scaffolds:
self.chr_name_n += 1
elif tag not in ("Contaminant", "Cut", "Haplotig", "Unloc"):
Expand Down

0 comments on commit 601d2b8

Please sign in to comment.