Merge pull request #26 from marco-mariotti/migration_guide

combine_interval_columns: added swap
pyranges · May 7, 2024 · bedf5df · bedf5df
2 parents 3b8957b + 9bc695f
commit bedf5df
Show file tree

Hide file tree

Showing 3 changed files with 33 additions and 7 deletions.
diff --git a/pyranges/core/names.py b/pyranges/core/names.py
@@ -84,7 +84,7 @@ def wrapper(*args, **kwargs) -> "PyRanges | pd.DataFrame | pd.Series":
 VALID_JOIN_OPTIONS = [JOIN_INNER, JOIN_LEFT, JOIN_OUTER, JOIN_RIGHT]
 
 JOIN_SUFFIX = "_b"
-VALID_COMBINE_OPTIONS = Literal["intersect", "union"]
+VALID_COMBINE_OPTIONS = Literal["intersect", "union", "swap"]
 
 NEAREST_ANY_DIRECTION: Final = "any"
 NEAREST_UPSTREAM: Final = "upstream"

diff --git a/pyranges/core/pyranges_main.py b/pyranges/core/pyranges_main.py
@@ -4261,12 +4261,13 @@ def combine_interval_columns(
     ) -> "pr.PyRanges":
         """Use two pairs of columns representing intervals to create a new start and end column.
 
+        The function is designed as post-processing after join_ranges to aggregate the coordinates of the two intervals.
         By default, the new start and end columns will be the intersection of the intervals.
 
         Parameters
         ----------
-        function : {"intersect", "union"} or Callable, default "intersect"
-            How to combine the intervals: "intersect" or "union".
+        function : {"intersect", "union", "swap"} or Callable, default "intersect"
+            How to combine the self and other intervals: "intersect", "union", or "swap"
             If a callable is passed, it should take four Series arguments: start1, end1, start2, end2;
             and return a tuple of two integers: (new_starts, new_ends).
 
@@ -4325,6 +4326,19 @@ def combine_interval_columns(
         PyRanges with 5 rows, 4 columns, and 1 index columns (with 2 index duplicates).
         Contains 1 chromosomes and 2 strands.
 
+        >>> j.combine_interval_columns('swap')
+          index  |    Chromosome      Start      End  Strand
+          int64  |    category        int64    int64  category
+        -------  ---  ------------  -------  -------  ----------
+              1  |    chr1            10073    10272  +
+              0  |    chr1             9988    10187  -
+              0  |    chr1            10079    10278  -
+              2  |    chr1             9988    10187  -
+              2  |    chr1            10079    10278  -
+        PyRanges with 5 rows, 4 columns, and 1 index columns (with 2 index duplicates).
+        Contains 1 chromosomes and 2 strands.
+
+
         Use a custom function that keeps the start of the first interval and the end of the second:
 
         >>> def custom_combine(s1, e1, s2, e2): return (s1, e2)
@@ -4341,12 +4355,18 @@ def combine_interval_columns(
         Contains 1 chromosomes and 2 strands.
 
         """
-        from pyranges.methods.combine_positions import _intersect_interval_columns, _union_interval_columns
+        from pyranges.methods.combine_positions import (
+            _intersect_interval_columns,
+            _swap_interval_columns,
+            _union_interval_columns,
+        )
 
         if function == "intersect":
             function = _intersect_interval_columns
         elif function == "union":
             function = _union_interval_columns
+        elif function == "swap":
+            function = _swap_interval_columns
 
         new_starts, new_ends = function(self[start], self[end], self[start2], self[end2])
 

diff --git a/pyranges/methods/combine_positions.py b/pyranges/methods/combine_positions.py
@@ -12,7 +12,6 @@ def _intersect_interval_columns(
         np.where(starts > starts2.to_numpy(), starts, starts2),
         index=starts.index,
     )
-
     new_ends = pd.Series(
         np.where(ends < ends2.to_numpy(), ends, ends2),
         index=ends.index,
@@ -30,10 +29,17 @@ def _union_interval_columns(
         np.where(starts < starts2.to_numpy(), starts, starts2),
         index=starts.index,
     )
-
     new_ends = pd.Series(
         np.where(ends > ends2.to_numpy(), ends, ends2),
         index=ends.index,
     )
-
     return new_starts, new_ends
+
+
+def _swap_interval_columns(
+    starts: pd.Series,  # noqa: ARG001
+    ends: pd.Series,  # noqa: ARG001
+    starts2: pd.Series,
+    ends2: pd.Series,
+) -> tuple[pd.Series, pd.Series]:
+    return starts2, ends2