Skip to content

Commit

Permalink
Merge pull request #1193 from rpsimeon34/docs-docstrings
Browse files Browse the repository at this point in the history
docs: Add some missing docstrings and include dataset discovery tools in docs
  • Loading branch information
lgray authored Oct 16, 2024
2 parents 137b349 + e09779a commit 84f33c0
Show file tree
Hide file tree
Showing 31 changed files with 814 additions and 143 deletions.
13 changes: 13 additions & 0 deletions docs/source/dataset_tools.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
.. _dataset-tools:

Dataset Tools
*************

This page contains documentation for parts of the ``coffea.dataset_tools``
package that are not included in the ``coffea`` namespace. That is, they
must be explicitly imported.

.. automodule:: coffea.dataset_tools.dataset_query
:members:
.. automodule:: coffea.dataset_tools.rucio_utils
:members:
12 changes: 12 additions & 0 deletions docs/source/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ When executing
a subset of the full coffea package is imported into the python environment.
Some packages must be imported explicitly, so as to avoid importing unnecessary
and/or heavy dependencies. Below lists the packages available in the ``coffea`` namespace.
Under that, we list documentation for some of the coffea packages that need to be
imported explicitly.

In coffea Namespace
-----------------------

.. autosummary::
:toctree: modules
Expand All @@ -28,3 +33,10 @@ and/or heavy dependencies. Below lists the packages available in the ``coffea``
coffea.nanoevents.methods.vector
coffea.processor
coffea.util

Not in coffea Namespace
---------------------------
Here is documentation for some of the packages that are not automatically
imported on a call to ``import coffea``.

* :ref:`dataset-tools`.
49 changes: 48 additions & 1 deletion src/coffea/analysis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,24 @@


class WeightStatistics:
"""
Container for statistics about the weight, including the sum of squared weights
and number of entries.
Parameters
----------
sumw: float
The sum of weights
sumw2: float
The sum of squared weights
minw: float
The minimum weight
maxw: float
The maximum weight
n: int
The number of entries
"""

def __init__(self, sumw=0.0, sumw2=0.0, minw=numpy.inf, maxw=-numpy.inf, n=0):
self.sumw = sumw
self.sumw2 = sumw2
Expand All @@ -36,6 +54,17 @@ def identity(self):
return WeightStatistics()

def add(self, other):
"""Add two WeightStatistics objects together.
Adds the sum of weights, the sum of squared weights, and the number of entries.
Takes the minimum and maximum across the two WeightStatistics objects. Modifies
this object in place.
Parameters
----------
other: WeightStatistics
The other WeightStatistics object to add to this one
"""
self.sumw += other.sumw
self.sumw2 += other.sumw2
self.minw = min(self.minw, other.minw)
Expand Down Expand Up @@ -76,6 +105,8 @@ def __init__(self, size, storeIndividual=False):

@property
def weightStatistics(self):
"""Statistics about the weight, including the sum of squared weights
and number of entries."""
return self._weightStats

def __add_eager(self, name, weight, weightUp, weightDown, shift):
Expand Down Expand Up @@ -348,7 +379,7 @@ def __add_variation(
self.__add_variation_delayed(name, weight, weightUp, weightDown, shift)

def weight(self, modifier=None):
"""Current event weight vector
"""Returns the current event weight vector
Parameters
----------
Expand Down Expand Up @@ -1100,6 +1131,14 @@ def names(self):

@property
def delayed_mode(self):
"""
Is the PackedSelection in delayed mode?
Returns
-------
res: bool
True if the PackedSelection is in delayed mode
"""
if isinstance(self._data, dask_awkward.Array):
return True
elif isinstance(self._data, numpy.ndarray):
Expand All @@ -1112,6 +1151,14 @@ def delayed_mode(self):

@property
def maxitems(self):
"""
What is the maximum supported number of selections in this PackedSelection?
Returns
-------
res: bool
The maximum supported number of selections
"""
return PackedSelection._supported_types[self._dtype]

def __add_delayed(self, name, selection, fill_value):
Expand Down
17 changes: 17 additions & 0 deletions src/coffea/btag_tools/btagscalefactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,23 @@ class BTagScaleFactor:
Defaults to 'comb,comb,incl'
keep_df : bool, optional
If set true, keep the parsed dataframe as an attribute (.df) for later inspection
Attributes
----------
LOOSE: int
Value is 0. This is the integer for the loose WP
MEDIUM: int
Value is 1. This is the integer for the medium WP
TIGHT: int
Value is 2. This is the integer for the tight WP
RESHAPE: int
Value is 3. This is the integer for the reshape WP
FLAV_B: int
Value is 0. This is the integer to represent the b flavor. Input choice to some methods.
FLAV_C: int
Value is 1. This is the integer to represent the c flavor. Input choice to some methods.
FLAV_UDSG: int
Value is 2. This is the integer to represent u, d, and s flavors, as well as gluons. Input choice to some methods.
"""

LOOSE, MEDIUM, TIGHT, RESHAPE = range(4)
Expand Down
2 changes: 2 additions & 0 deletions src/coffea/dataset_tools/apply_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def apply_to_dataset(
) -> DaskOutputType | tuple[DaskOutputType, dask_awkward.Array]:
"""
Apply the supplied function or processor to the supplied dataset.
Parameters
----------
data_manipulation : ProcessorABC or GenericHEPAnalysis
Expand Down Expand Up @@ -97,6 +98,7 @@ def apply_to_fileset(
) -> dict[str, DaskOutputType] | tuple[dict[str, DaskOutputType], dask_awkward.Array]:
"""
Apply the supplied function or processor to the supplied fileset (set of datasets).
Parameters
----------
data_manipulation : ProcessorABC or GenericHEPAnalysis
Expand Down
Loading

0 comments on commit 84f33c0

Please sign in to comment.