Skip to content

Commit

Permalink
Merge pull request #38 from marco-mariotti/master
Browse files Browse the repository at this point in the history
fix workflows
  • Loading branch information
marco-mariotti authored Jul 25, 2024
2 parents 57f7009 + 9195353 commit ca4e497
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 72 deletions.
8 changes: 4 additions & 4 deletions docs/how_to_overlap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ are modified to return only the actual overlaps:
Method :func:`subtract_ranges <pyranges.PyRanges.subtract_ranges>` allows to obtain the portions of intervals in self
that do not overlap any interval in other:

>>> a2.subtract_ranges(b)
>>> a2.subtract_ranges(b).sort_values('Start')
index | Chromosome Start End Strand odd
int64 | object int64 int64 object int64
------- --- ------------ ------- ------- -------- -------
Expand All @@ -538,13 +538,13 @@ that do not overlap any interval in other:
2 | chr1 18 21 - 0
3 | chr1 23 25 - 1
5 | chr1 32 34 + 0
5 | chr1 36 37 + 0
6 | chr1 33 34 + 1
5 | chr1 36 37 + 0
PyRanges with 7 rows, 5 columns, and 1 index columns (with 1 index duplicates).
Contains 1 chromosomes and 2 strands.


>>> a2.subtract_ranges(b, strand_behavior='ignore')
>>> a2.subtract_ranges(b, strand_behavior='ignore').sort_values('Start')
index | Chromosome Start End Strand odd
int64 | object int64 int64 object int64
------- --- ------------ ------- ------- -------- -------
Expand All @@ -554,8 +554,8 @@ that do not overlap any interval in other:
2 | chr1 20 21 - 0
3 | chr1 23 25 - 1
5 | chr1 32 34 + 0
5 | chr1 36 37 + 0
6 | chr1 33 34 + 1
5 | chr1 36 37 + 0
PyRanges with 8 rows, 5 columns, and 1 index columns (with 2 index duplicates).
Contains 1 chromosomes and 2 strands.

Expand Down
119 changes: 61 additions & 58 deletions docs/how_to_rows.rst
Original file line number Diff line number Diff line change
Expand Up @@ -262,22 +262,22 @@ Sorting PyRanges
PyRanges objects can be sorted (i.e. altering the order of rows) by calling the pandas dataframe method ``sort_values``,
or the PyRanges method :func:`sort_ranges <pyranges.PyRanges.sort_ranges>`.

>>> import random; random.seed(123)
>>> import random; random.seed(1)
>>> c = pr.example_data.chipseq.remove_nonloc_columns()
>>> c['peak'] = [random.randint(0, 100) for _ in range(len(c))] # add a column with random values
>>> c['peak'] = [random.randint(0, 1000) for _ in range(len(c))] # add a column with random values
>>> c
index | Chromosome Start End Strand peak
int64 | category int64 int64 category int64
------- --- ------------ --------- --------- ---------- -------
0 | chr8 28510032 28510057 - 6
1 | chr7 107153363 107153388 - 34
2 | chr5 135821802 135821827 - 11
3 | chr14 19418999 19419024 - 98
0 | chr8 28510032 28510057 - 137
1 | chr7 107153363 107153388 - 582
2 | chr5 135821802 135821827 - 867
3 | chr14 19418999 19419024 - 821
... | ... ... ... ... ...
16 | chr9 120803448 120803473 + 43
17 | chr6 89296757 89296782 - 71
18 | chr1 194245558 194245583 + 42
19 | chr8 57916061 57916086 + 89
16 | chr9 120803448 120803473 + 96
17 | chr6 89296757 89296782 - 499
18 | chr1 194245558 194245583 + 29
19 | chr8 57916061 57916086 + 914
PyRanges with 20 rows, 5 columns, and 1 index columns.
Contains 15 chromosomes and 2 strands.

Expand All @@ -289,18 +289,19 @@ For example, let's sort by column ``peak``:
index | Chromosome Start End Strand peak
int64 | category int64 int64 category int64
------- --- ------------ --------- --------- ---------- -------
3 | chr14 19418999 19419024 - 98
19 | chr8 57916061 57916086 + 89
17 | chr6 89296757 89296782 - 71
10 | chr4 98488749 98488774 + 71
19 | chr8 57916061 57916086 + 914
2 | chr5 135821802 135821827 - 867
3 | chr14 19418999 19419024 - 821
14 | chr2 152562484 152562509 - 807
... | ... ... ... ... ...
2 | chr5 135821802 135821827 - 11
13 | chr1 80668132 80668157 - 6
0 | chr8 28510032 28510057 - 6
7 | chr19 19571102 19571127 + 4
7 | chr19 19571102 19571127 + 120
16 | chr9 120803448 120803473 + 96
5 | chr21 40099618 40099643 + 64
18 | chr1 194245558 194245583 + 29
PyRanges with 20 rows, 5 columns, and 1 index columns.
Contains 15 chromosomes and 2 strands.


PyRanges :func:`sort_ranges <pyranges.PyRanges.sort_ranges>` is designed for genomic ranges.
By default, it sorts by Chromosome, Strand, then interval coordinates. If Strands are valid (
see :func:`strand_valid <pyranges.PyRanges.strand_valid>`), then intervals on the reverse strand are
Expand All @@ -310,33 +311,34 @@ sorted in reverse order:
index | Chromosome Start End Strand peak
int64 | category int64 int64 category int64
------- --- ------------ --------- --------- ---------- -------
12 | chr1 38457520 38457545 + 43
18 | chr1 194245558 194245583 + 42
13 | chr1 80668132 80668157 - 6
9 | chr10 35419784 35419809 - 68
12 | chr1 38457520 38457545 + 667
18 | chr1 194245558 194245583 + 29
13 | chr1 80668132 80668157 - 388
9 | chr10 35419784 35419809 - 779
... | ... ... ... ... ...
19 | chr8 57916061 57916086 + 89
0 | chr8 28510032 28510057 - 6
6 | chr8 22714402 22714427 - 13
16 | chr9 120803448 120803473 + 43
19 | chr8 57916061 57916086 + 914
0 | chr8 28510032 28510057 - 137
6 | chr8 22714402 22714427 - 261
16 | chr9 120803448 120803473 + 96
PyRanges with 20 rows, 5 columns, and 1 index columns.
Contains 15 chromosomes and 2 strands.


Above, ``chr10`` appears before ``chr8`` because that what string sorting does. We can force 'natural sorting':

>>> c.sort_ranges(natsorting=True)
index | Chromosome Start End Strand peak
int64 | category int64 int64 category int64
------- --- ------------ --------- --------- ---------- -------
12 | chr1 38457520 38457545 + 43
18 | chr1 194245558 194245583 + 42
13 | chr1 80668132 80668157 - 6
14 | chr2 152562484 152562509 - 20
12 | chr1 38457520 38457545 + 667
18 | chr1 194245558 194245583 + 29
13 | chr1 80668132 80668157 - 388
14 | chr2 152562484 152562509 - 807
... | ... ... ... ... ...
4 | chr12 106679761 106679786 - 52
3 | chr14 19418999 19419024 - 98
7 | chr19 19571102 19571127 + 4
5 | chr21 40099618 40099643 + 34
4 | chr12 106679761 106679786 - 782
3 | chr14 19418999 19419024 - 821
7 | chr19 19571102 19571127 + 120
5 | chr21 40099618 40099643 + 64
PyRanges with 20 rows, 5 columns, and 1 index columns.
Contains 15 chromosomes and 2 strands.

Expand All @@ -347,34 +349,35 @@ coordinates:
index | Chromosome Start End Strand peak
int64 | category int64 int64 category int64
------- --- ------------ --------- --------- ---------- -------
18 | chr1 194245558 194245583 + 42
12 | chr1 38457520 38457545 + 43
13 | chr1 80668132 80668157 - 6
9 | chr10 35419784 35419809 - 68
18 | chr1 194245558 194245583 + 29
12 | chr1 38457520 38457545 + 667
13 | chr1 80668132 80668157 - 388
9 | chr10 35419784 35419809 - 779
... | ... ... ... ... ...
19 | chr8 57916061 57916086 + 89
0 | chr8 28510032 28510057 - 6
6 | chr8 22714402 22714427 - 13
16 | chr9 120803448 120803473 + 43
19 | chr8 57916061 57916086 + 914
0 | chr8 28510032 28510057 - 137
6 | chr8 22714402 22714427 - 261
16 | chr9 120803448 120803473 + 96
PyRanges with 20 rows, 5 columns, and 1 index columns.
Contains 15 chromosomes and 2 strands.


Note that above each block defined by Chromosome and Strand is sorted by ``peak`` in ascending order.
Let's sort by descending order:

>>> c.sort_ranges('peak', sort_descending='peak')
index | Chromosome Start End Strand peak
int64 | category int64 int64 category int64
------- --- ------------ --------- --------- ---------- -------
12 | chr1 38457520 38457545 + 43
18 | chr1 194245558 194245583 + 42
13 | chr1 80668132 80668157 - 6
9 | chr10 35419784 35419809 - 68
12 | chr1 38457520 38457545 + 667
18 | chr1 194245558 194245583 + 29
13 | chr1 80668132 80668157 - 388
9 | chr10 35419784 35419809 - 779
... | ... ... ... ... ...
19 | chr8 57916061 57916086 + 89
6 | chr8 22714402 22714427 - 13
0 | chr8 28510032 28510057 - 6
16 | chr9 120803448 120803473 + 43
19 | chr8 57916061 57916086 + 914
6 | chr8 22714402 22714427 - 261
0 | chr8 28510032 28510057 - 137
16 | chr9 120803448 120803473 + 96
PyRanges with 20 rows, 5 columns, and 1 index columns.
Contains 15 chromosomes and 2 strands.

Expand All @@ -384,15 +387,15 @@ To use a different priorization of genomic location columns, specify them in the
index | Chromosome Start End Strand peak
int64 | category int64 int64 category int64
------- --- ------------ --------- --------- ---------- -------
3 | chr14 19418999 19419024 - 98
19 | chr8 57916061 57916086 + 89
10 | chr4 98488749 98488774 + 71
17 | chr6 89296757 89296782 - 71
19 | chr8 57916061 57916086 + 914
2 | chr5 135821802 135821827 - 867
3 | chr14 19418999 19419024 - 821
14 | chr2 152562484 152562509 - 807
... | ... ... ... ... ...
2 | chr5 135821802 135821827 - 11
13 | chr1 80668132 80668157 - 6
0 | chr8 28510032 28510057 - 6
7 | chr19 19571102 19571127 + 4
7 | chr19 19571102 19571127 + 120
16 | chr9 120803448 120803473 + 96
5 | chr21 40099618 40099643 + 64
18 | chr1 194245558 194245583 + 29
PyRanges with 20 rows, 5 columns, and 1 index columns.
Contains 15 chromosomes and 2 strands.

Expand Down
4 changes: 2 additions & 2 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ The function :func:`get_sequence <pyranges.PyRanges.get_sequence>` returns one s
The ``Sequence`` column is a pandas Series containing strings. We see that the starting codon is ATG in most cases, as expected.
When we check the length of the sequences, we notice that some are not 3-letter long:

>>> (first.Sequence.str.len() == 3 ).all()
>>> bool( (first.Sequence.str.len() == 3 ).all() )
False

Let's look at those sequences, using a row selector as before:
Expand Down Expand Up @@ -345,7 +345,7 @@ i.e. joining exons together. The sequence is given 5' to 3'.
``seq_first`` is not a PyRanges object, but a pandas DataFrame. It has a column for the group (ID) and one for Sequence.
Here we confirm the sequence length is always 3:

>>> (seq_first.Sequence.str.len()==3).all()
>>> bool( (seq_first.Sequence.str.len()==3).all() )
True


Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "pyranges1"
version = "1.0.2"
version = "1.0.3"
description = "GenomicRanges for Python."
requires-python = ">=3.12.0"
readme = "README.md"
Expand All @@ -27,7 +27,7 @@ keywords = ["bioinformatics", "genomicranges", "genomics"]
dependencies = ["pandas", "ncls>=0.0.63", "tabulate", "sorted_nearest>=0.0.33", "natsort"]

[project.optional-dependencies]
add-ons = ["pyrle >= 0.0.39", "bamread", "fisher", "pyfaidx", "pyBigWig", "joblib"]
add-ons = ["pyrle >= 0.0.39", "bamread", "pyfaidx", "pyBigWig", "joblib"] #"fisher",
dev = ["tox", "ruff == 0.3.0", "pyright", "pandas-stubs", "types-tabulate", "pytest-watcher", "pytest-xdist", "hypothesis>=6.92.1"]
docs = ["sphinx", "sphinx_rtd_theme", "sphinx-autoapi", "sphinxcontrib-napoleon"]
all = ["pyranges1[add-ons]", "pyranges1[dev]", "pyranges1[docs]"]
Expand All @@ -50,7 +50,6 @@ envlist = py312
deps =
pyrle >= 0.0.39
bamread
fisher
pyBigWig
pyfaidx
tox
Expand All @@ -61,6 +60,7 @@ deps =
pyright
joblib
hypothesis==6.92.1
#fisher
commands =
python tests/run_doctest_tutorial_howto.py
pytest --doctest-modules pyranges
Expand Down
10 changes: 5 additions & 5 deletions pyranges/ext/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ def _find_chromosome_max_end_positions(grs: list["PyRanges"]) -> pd.DataFrame:
Examples
--------
>>> f1, f2 = pr.example_data.f1, pr.example_data.f2 # both only have chr1
>>> f1["End"].max()
>>> int(f1["End"].max())
9
>>> f2["End"].max()
>>> int(f2["End"].max())
7
"""
Expand Down Expand Up @@ -220,7 +220,7 @@ def fisher_exact(tp: Series, fp: Series, fn: Series, tn: Series, pseudocount: in
0 12 5 29 2
1 0 12 10 2
>>> pr.stats.fisher_exact(df.TP, df.FP, df.TN, df.FN)
>>> pr.stats.fisher_exact(df.TP, df.FP, df.TN, df.FN) # doctest: +SKIP
odds_ratio P PLeft PRight
0 0.165517 0.080269 0.044555 0.994525
1 0.000000 0.000067 0.000034 1.000000
Expand Down Expand Up @@ -726,7 +726,7 @@ def forbes(
Examples
--------
>>> gr, gr2 = pr.example_data.f1, pr.example_data.f2
>>> pr.stats.forbes(gr, gr2, chromsizes={"chr1": 10})
>>> float(pr.stats.forbes(gr, gr2, chromsizes={"chr1": 10}))
0.8333333333333334
"""
Expand Down Expand Up @@ -778,7 +778,7 @@ def jaccard(
--------
>>> gr, gr2 = pr.example_data.f1, pr.example_data.f2
>>> chromsizes = pr.example_data.chromsizes
>>> pr.stats.jaccard(gr, gr2)
>>> float(pr.stats.jaccard(gr, gr2))
0.14285714285714285
"""
Expand Down

0 comments on commit ca4e497

Please sign in to comment.