Skip to content

Commit

Permalink
feat: add chrom1/chrom2 arguments to add_pair
Browse files Browse the repository at this point in the history
  • Loading branch information
msto committed Oct 17, 2023
1 parent d2d50c4 commit e5bf00f
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 38 deletions.
11 changes: 7 additions & 4 deletions fgpyo/sam/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ def add_pair(
bases2: Optional[str] = None,
quals1: Optional[List[int]] = None,
quals2: Optional[List[int]] = None,
chrom: str = sam.NO_REF_NAME,
chrom1: str = sam.NO_REF_NAME,
chrom2: Optional[str] = None,
start1: int = sam.NO_REF_POS,
start2: int = sam.NO_REF_POS,
cigar1: Optional[str] = None,
Expand Down Expand Up @@ -379,7 +380,8 @@ def add_pair(
bases2: The bases for R2. If None is given a random sequence is generated.
quals1: The list of int qualities for R1. If None, the default base quality is used.
quals2: The list of int qualities for R2. If None, the default base quality is used.
chrom: The chromosome to which both reads are mapped. Defaults to the unmapped value.
chrom1: The chromosome to which R1 is mapped. Defaults to the unmapped value.
chrom2: The chromosome to which R2 is mapped. If None, `chrom1` is used.
start1: The start position of R1. Defaults to the unmapped value.
start2: The start position of R2. Defaults to the unmapped value.
cigar1: The cigar string for R1. Defaults to None for unmapped reads, otherwise all M.
Expand All @@ -404,16 +406,17 @@ def add_pair(
raise ValueError(f"Invalid value for strand2: {strand2}")

name = name if name is not None else self._next_name()
chrom2 = chrom2 if chrom2 is not None else chrom1

# Setup R1
r1 = self._new_rec(name=name, chrom=chrom, start=start1, mapq=mapq1, attrs=attrs)
r1 = self._new_rec(name=name, chrom=chrom1, start=start1, mapq=mapq1, attrs=attrs)
self._set_flags(r1, read_num=1, strand=strand1)
self._set_length_dependent_fields(
rec=r1, length=self.r1_len, bases=bases1, quals=quals1, cigar=cigar1
)

# Setup R2
r2 = self._new_rec(name=name, chrom=chrom, start=start2, mapq=mapq2, attrs=attrs)
r2 = self._new_rec(name=name, chrom=chrom2, start=start2, mapq=mapq2, attrs=attrs)
self._set_flags(r2, read_num=2, strand=strand2)
self._set_length_dependent_fields(
rec=r2, length=self.r2_len, bases=bases2, quals=quals2, cigar=cigar2
Expand Down
69 changes: 42 additions & 27 deletions fgpyo/sam/tests/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_add_pair_all_fields() -> None:
builder = SamBuilder()
builder.add_pair(
name="q1",
chrom="chr1",
chrom1="chr1",
bases1="ACGTG",
quals1=[20, 21, 22, 23, 24],
start1=10000,
Expand Down Expand Up @@ -52,7 +52,7 @@ def test_add_pair_all_fields() -> None:

def test_add_pair_minimal() -> None:
builder = SamBuilder(r1_len=10, r2_len=5, base_quality=25, mapping_quality=20)
r1, r2 = builder.add_pair(chrom="chr1", start1=1000, start2=1200)
r1, r2 = builder.add_pair(chrom1="chr1", start1=1000, start2=1200)
assert r1.query_name == r2.query_name
assert r1.reference_name == r2.reference_name == "chr1"
assert r1.reference_start == 1000
Expand All @@ -71,20 +71,22 @@ def test_add_pair_minimal() -> None:

def test_add_pair_mix_and_match() -> None:
builder = SamBuilder(r1_len=100, r2_len=100, base_quality=30)
r1, r2 = builder.add_pair(chrom="chr1", start1=500, start2=700, cigar1="75M", cigar2="9M1I30M")
r1, r2 = builder.add_pair(
chrom1="chr1", start1=500, start2=700, cigar1="75M", cigar2="9M1I30M"
)
assert len(r1.query_sequence) == len(r1.query_qualities) == 75
assert len(r2.query_sequence) == len(r2.query_qualities) == 40

r1, r2 = builder.add_pair(
chrom="chr1", start1=500, start2=700, bases1="ACGTGCATGC", bases2="ACGAC"
chrom1="chr1", start1=500, start2=700, bases1="ACGTGCATGC", bases2="ACGAC"
)
assert len(r1.query_sequence) == len(r1.query_qualities) == 10
assert len(r2.query_sequence) == len(r2.query_qualities) == 5
assert r1.cigarstring == "10M"
assert r2.cigarstring == "5M"

r1, r2 = builder.add_pair(
chrom="chr1", start1=500, start2=700, quals1=[30] * 20, quals2=[20] * 10
chrom1="chr1", start1=500, start2=700, quals1=[30] * 20, quals2=[20] * 10
)
assert len(r1.query_sequence) == len(r1.query_qualities) == 20
assert len(r2.query_sequence) == len(r2.query_qualities) == 10
Expand All @@ -93,18 +95,18 @@ def test_add_pair_mix_and_match() -> None:

# Now what if we provide multiple values that are inconsistent
with pytest.raises(ValueError, match="not length compatible"):
builder.add_pair(chrom="chr1", start1=10, start2=99, bases1="ACGTG", cigar1="10M")
builder.add_pair(chrom1="chr1", start1=10, start2=99, bases1="ACGTG", cigar1="10M")

with pytest.raises(ValueError, match="not length compatible"):
builder.add_pair(chrom="chr1", start1=10, start2=99, bases1="ACGTG", quals1=[2, 2])
builder.add_pair(chrom1="chr1", start1=10, start2=99, bases1="ACGTG", quals1=[2, 2])

with pytest.raises(ValueError, match="not length compatible"):
builder.add_pair(chrom="chr1", start1=10, start2=99, quals1=[2, 2], cigar1="5M")
builder.add_pair(chrom1="chr1", start1=10, start2=99, quals1=[2, 2], cigar1="5M")


def test_unmapped_reads() -> None:
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=1000)
r1, r2 = builder.add_pair(chrom1="chr1", start1=1000)
assert not r1.is_unmapped
assert r1.mate_is_unmapped
assert r2.is_unmapped
Expand All @@ -115,7 +117,7 @@ def test_unmapped_reads() -> None:
assert rec.next_reference_name == "chr1"
assert rec.next_reference_start == 1000

r1, r2 = builder.add_pair(chrom="chr1", start2=2000)
r1, r2 = builder.add_pair(chrom1="chr1", start2=2000)
assert r1.is_unmapped
assert not r1.mate_is_unmapped
assert not r2.is_unmapped
Expand All @@ -126,7 +128,7 @@ def test_unmapped_reads() -> None:
assert rec.next_reference_name == "chr1"
assert rec.next_reference_start == 2000

r1, r2 = builder.add_pair(chrom=sam.NO_REF_NAME)
r1, r2 = builder.add_pair(chrom1=sam.NO_REF_NAME)
assert r1.is_unmapped
assert r1.mate_is_unmapped
assert r2.is_unmapped
Expand All @@ -140,33 +142,46 @@ def test_unmapped_reads() -> None:

def test_invalid_strand() -> None:
with pytest.raises(ValueError, match="strand"):
SamBuilder().add_pair(chrom="chr1", start1=100, start2=200, strand1="F", strand2="R")
SamBuilder().add_pair(chrom1="chr1", start1=100, start2=200, strand1="F", strand2="R")


def test_proper_pair() -> None:
builder = SamBuilder()

# Regular innies
for rec in builder.add_pair(chrom="chr1", start1=5000, start2=5200, strand1="+", strand2="-"):
for rec in builder.add_pair(chrom1="chr1", start1=5000, start2=5200, strand1="+", strand2="-"):
assert rec.is_proper_pair
for rec in builder.add_pair(chrom="chr1", start1=5200, start2=5000, strand1="-", strand2="+"):
for rec in builder.add_pair(chrom1="chr1", start1=5200, start2=5000, strand1="-", strand2="+"):
assert rec.is_proper_pair

# Outies
for rec in builder.add_pair(chrom="chr1", start1=5000, start2=5200, strand1="-", strand2="+"):
for rec in builder.add_pair(chrom1="chr1", start1=5000, start2=5200, strand1="-", strand2="+"):
assert not rec.is_proper_pair
for rec in builder.add_pair(chrom="chr1", start1=5200, start2=5000, strand1="+", strand2="-"):
for rec in builder.add_pair(chrom1="chr1", start1=5200, start2=5000, strand1="+", strand2="-"):
assert not rec.is_proper_pair

# Unmapped
for rec in builder.add_pair(chrom="chr1", start1=5000, strand1="+"):
for rec in builder.add_pair(chrom1="chr1", start1=5000, strand1="+"):
assert not rec.is_proper_pair
for rec in builder.add_pair(chrom="chr1", start2=5000, strand2="+"):
for rec in builder.add_pair(chrom1="chr1", start2=5000, strand2="+"):
assert not rec.is_proper_pair
for rec in builder.add_pair():
assert not rec.is_proper_pair


def test_discordant_pair() -> None:
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom1="chr1", start1=1000, chrom2="chr2", start2=1000)

assert r1.query_name == r2.query_name
assert r1.reference_name == "chr1"
assert r2.reference_name == "chr2"
assert r1.reference_start == 1000
assert r2.reference_start == 1000
assert not r1.is_reverse
assert r2.is_reverse


def test_add_single() -> None:
builder = SamBuilder(r1_len=25, r2_len=50)

Expand Down Expand Up @@ -199,10 +214,10 @@ def test_add_single() -> None:

def test_sorting() -> None:
builder = SamBuilder()
builder.add_pair(chrom="chr1", start1=5000, start2=4700, strand1="-", strand2="+")
builder.add_pair(chrom="chr1", start1=4000, start2=4300)
builder.add_pair(chrom="chr5", start1=4000, start2=4300)
builder.add_pair(chrom="chr2", start1=4000, start2=4300)
builder.add_pair(chrom1="chr1", start1=5000, start2=4700, strand1="-", strand2="+")
builder.add_pair(chrom1="chr1", start1=4000, start2=4300)
builder.add_pair(chrom1="chr5", start1=4000, start2=4300)
builder.add_pair(chrom1="chr2", start1=4000, start2=4300)

last_ref_id = -1
last_start = -1
Expand All @@ -217,17 +232,17 @@ def test_sorting() -> None:
def test_custom_sd() -> None:
builder1 = SamBuilder()
builder2 = SamBuilder(sd=[{"SN": "hi", "LN": 999}, {"SN": "bye", "LN": 888}])
builder1.add_pair(chrom="chr1", start1=200, start2=400)
builder2.add_pair(chrom="hi", start1=200, start2=400)
builder1.add_pair(chrom1="chr1", start1=200, start2=400)
builder2.add_pair(chrom1="hi", start1=200, start2=400)

with pytest.raises(ValueError, match="not a valid chromosome name"):
builder1.add_pair(chrom="hi", start1=200, start2=400)
builder1.add_pair(chrom1="hi", start1=200, start2=400)

with pytest.raises(ValueError, match="not a valid chromosome name"):
builder2.add_pair(chrom="chr1", start1=200, start2=400)
builder2.add_pair(chrom1="chr1", start1=200, start2=400)


def test_custom_rg() -> None:
builder = SamBuilder(rg={"ID": "novel", "SM": "custom_rg", "LB": "foo", "PL": "ILLUMINA"})
for rec in builder.add_pair(chrom="chr1", start1=100, start2=200):
for rec in builder.add_pair(chrom1="chr1", start1=100, start2=200):
assert rec.get_tag("RG") == "novel"
4 changes: 2 additions & 2 deletions fgpyo/sam/tests/test_clipping.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ def r(start: Optional[int], cigar: Optional[str], strand: Optional[str] = "+") -
"""Constructs a read for testing."""
builder = SamBuilder()
if start:
r1, r2 = builder.add_pair(chrom="chr1", start1=start, cigar1=cigar, strand1=strand)
r1, r2 = builder.add_pair(chrom1="chr1", start1=start, cigar1=cigar, strand1=strand)
else:
r1, r2 = builder.add_pair()
return r1


def test_make_read_unmapped() -> None:
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=100, start2=250)
r1, r2 = builder.add_pair(chrom1="chr1", start1=100, start2=250)

clipping._make_read_unmapped(r1)
assert r1.is_unmapped
Expand Down
2 changes: 1 addition & 1 deletion fgpyo/sam/tests/test_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def test_is_clipping() -> None:

def test_isize() -> None:
builder = SamBuilder()
r1, r2 = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
r1, r2 = builder.add_pair(chrom1="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M")
assert sam.isize(r1, r2) == 190
assert sam.isize(r2, r1) == -190

Expand Down
4 changes: 2 additions & 2 deletions fgpyo/sam/tests/test_template_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def test_template_init_function() -> None:
builder: SamBuilder = SamBuilder()
builder.add_pair(name="x", chrom="chr1", start1=1, start2=2)
builder.add_pair(name="x", chrom1="chr1", start1=1, start2=2)
r1 = builder.to_sorted_list()[0]
r2 = builder.to_sorted_list()[1]
template = Template(
Expand All @@ -26,7 +26,7 @@ def test_to_templates() -> None:
builder = SamBuilder()

# Series of alignments for one template
builder.add_pair(name="q1", chrom="chr1", start1=1, start2=2)
builder.add_pair(name="q1", chrom1="chr1", start1=1, start2=2)
builder.add_single(name="q1", read_num=1, chrom="chr1", start=1, supplementary=True)
builder.add_single(name="q1", read_num=1, chrom="chr1", start=11, supplementary=True)
builder.add_single(name="q1", read_num=2, chrom="chr1", start=2, supplementary=True)
Expand Down
4 changes: 2 additions & 2 deletions fgpyo/util/tests/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def test_progress_logger_as_context_manager() -> None:


builder = SamBuilder()
r1_mapped_named, r2_unmapped_named = builder.add_pair(chrom="chr1", start1=1000)
r1_unmapped_un_named, r2_unmapped_un_named = builder.add_pair(chrom=sam.NO_REF_NAME)
r1_mapped_named, r2_unmapped_named = builder.add_pair(chrom1="chr1", start1=1000)
r1_unmapped_un_named, r2_unmapped_un_named = builder.add_pair(chrom1=sam.NO_REF_NAME)


@pytest.mark.parametrize(
Expand Down

0 comments on commit e5bf00f

Please sign in to comment.