Skip to content

Commit

Permalink
Merge pull request #26 from marco-mariotti/migration_guide
Browse files Browse the repository at this point in the history
combine_interval_columns: added swap
  • Loading branch information
marco-mariotti authored May 7, 2024
2 parents 3b8957b + 9bc695f commit bedf5df
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pyranges/core/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def wrapper(*args, **kwargs) -> "PyRanges | pd.DataFrame | pd.Series":
VALID_JOIN_OPTIONS = [JOIN_INNER, JOIN_LEFT, JOIN_OUTER, JOIN_RIGHT]

JOIN_SUFFIX = "_b"
VALID_COMBINE_OPTIONS = Literal["intersect", "union"]
VALID_COMBINE_OPTIONS = Literal["intersect", "union", "swap"]

NEAREST_ANY_DIRECTION: Final = "any"
NEAREST_UPSTREAM: Final = "upstream"
Expand Down
26 changes: 23 additions & 3 deletions pyranges/core/pyranges_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4261,12 +4261,13 @@ def combine_interval_columns(
) -> "pr.PyRanges":
"""Use two pairs of columns representing intervals to create a new start and end column.
The function is designed as post-processing after join_ranges to aggregate the coordinates of the two intervals.
By default, the new start and end columns will be the intersection of the intervals.
Parameters
----------
function : {"intersect", "union"} or Callable, default "intersect"
How to combine the intervals: "intersect" or "union".
function : {"intersect", "union", "swap"} or Callable, default "intersect"
How to combine the self and other intervals: "intersect", "union", or "swap"
If a callable is passed, it should take four Series arguments: start1, end1, start2, end2;
and return a tuple of two integers: (new_starts, new_ends).
Expand Down Expand Up @@ -4325,6 +4326,19 @@ def combine_interval_columns(
PyRanges with 5 rows, 4 columns, and 1 index columns (with 2 index duplicates).
Contains 1 chromosomes and 2 strands.
>>> j.combine_interval_columns('swap')
index | Chromosome Start End Strand
int64 | category int64 int64 category
------- --- ------------ ------- ------- ----------
1 | chr1 10073 10272 +
0 | chr1 9988 10187 -
0 | chr1 10079 10278 -
2 | chr1 9988 10187 -
2 | chr1 10079 10278 -
PyRanges with 5 rows, 4 columns, and 1 index columns (with 2 index duplicates).
Contains 1 chromosomes and 2 strands.
Use a custom function that keeps the start of the first interval and the end of the second:
>>> def custom_combine(s1, e1, s2, e2): return (s1, e2)
Expand All @@ -4341,12 +4355,18 @@ def combine_interval_columns(
Contains 1 chromosomes and 2 strands.
"""
from pyranges.methods.combine_positions import _intersect_interval_columns, _union_interval_columns
from pyranges.methods.combine_positions import (
_intersect_interval_columns,
_swap_interval_columns,
_union_interval_columns,
)

if function == "intersect":
function = _intersect_interval_columns
elif function == "union":
function = _union_interval_columns
elif function == "swap":
function = _swap_interval_columns

new_starts, new_ends = function(self[start], self[end], self[start2], self[end2])

Expand Down
12 changes: 9 additions & 3 deletions pyranges/methods/combine_positions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def _intersect_interval_columns(
np.where(starts > starts2.to_numpy(), starts, starts2),
index=starts.index,
)

new_ends = pd.Series(
np.where(ends < ends2.to_numpy(), ends, ends2),
index=ends.index,
Expand All @@ -30,10 +29,17 @@ def _union_interval_columns(
np.where(starts < starts2.to_numpy(), starts, starts2),
index=starts.index,
)

new_ends = pd.Series(
np.where(ends > ends2.to_numpy(), ends, ends2),
index=ends.index,
)

return new_starts, new_ends


def _swap_interval_columns(
starts: pd.Series, # noqa: ARG001
ends: pd.Series, # noqa: ARG001
starts2: pd.Series,
ends2: pd.Series,
) -> tuple[pd.Series, pd.Series]:
return starts2, ends2

0 comments on commit bedf5df

Please sign in to comment.