Merge pull request #1193 from rpsimeon34/docs-docstrings

docs: Add some missing docstrings and include dataset discovery tools in docs
CoffeaTeam · Oct 16, 2024 · 84f33c0 · 84f33c0
2 parents 137b349 + e09779a
commit 84f33c0
Show file tree

Hide file tree

Showing 31 changed files with 814 additions and 143 deletions.
diff --git a/docs/source/dataset_tools.rst b/docs/source/dataset_tools.rst
@@ -0,0 +1,13 @@
+.. _dataset-tools:
+
+Dataset Tools
+*************
+
+This page contains documentation for parts of the ``coffea.dataset_tools``
+package that are not included in the ``coffea`` namespace. That is, they
+must be explicitly imported.
+
+.. automodule:: coffea.dataset_tools.dataset_query
+    :members:
+.. automodule:: coffea.dataset_tools.rucio_utils
+    :members:
diff --git a/docs/source/reference.rst b/docs/source/reference.rst
@@ -9,6 +9,11 @@ When executing
 a subset of the full coffea package is imported into the python environment.
 Some packages must be imported explicitly, so as to avoid importing unnecessary
 and/or heavy dependencies.  Below lists the packages available in the ``coffea`` namespace.
+Under that, we list documentation for some of the coffea packages that need to be
+imported explicitly.
+
+In coffea Namespace
+-----------------------
 
 .. autosummary::
     :toctree: modules
@@ -28,3 +33,10 @@ and/or heavy dependencies.  Below lists the packages available in the ``coffea``
     coffea.nanoevents.methods.vector
     coffea.processor
     coffea.util
+
+Not in coffea Namespace
+---------------------------
+Here is documentation for some of the packages that are not automatically
+imported on a call to ``import coffea``.
+
+* :ref:`dataset-tools`.
diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
@@ -22,6 +22,24 @@
 
 
 class WeightStatistics:
+    """
+    Container for statistics about the weight, including the sum of squared weights
+    and number of entries.
+
+    Parameters
+    ----------
+        sumw: float
+            The sum of weights
+        sumw2: float
+            The sum of squared weights
+        minw: float
+            The minimum weight
+        maxw: float
+            The maximum weight
+        n: int
+            The number of entries
+    """
+
     def __init__(self, sumw=0.0, sumw2=0.0, minw=numpy.inf, maxw=-numpy.inf, n=0):
         self.sumw = sumw
         self.sumw2 = sumw2
@@ -36,6 +54,17 @@ def identity(self):
         return WeightStatistics()
 
     def add(self, other):
+        """Add two WeightStatistics objects together.
+
+        Adds the sum of weights, the sum of squared weights, and the number of entries.
+        Takes the minimum and maximum across the two WeightStatistics objects. Modifies
+        this object in place.
+
+        Parameters
+        ----------
+            other: WeightStatistics
+                The other WeightStatistics object to add to this one
+        """
         self.sumw += other.sumw
         self.sumw2 += other.sumw2
         self.minw = min(self.minw, other.minw)
@@ -76,6 +105,8 @@ def __init__(self, size, storeIndividual=False):
 
     @property
     def weightStatistics(self):
+        """Statistics about the weight, including the sum of squared weights
+        and number of entries."""
         return self._weightStats
 
     def __add_eager(self, name, weight, weightUp, weightDown, shift):
@@ -348,7 +379,7 @@ def __add_variation(
             self.__add_variation_delayed(name, weight, weightUp, weightDown, shift)
 
     def weight(self, modifier=None):
-        """Current event weight vector
+        """Returns the current event weight vector
 
         Parameters
         ----------
@@ -1100,6 +1131,14 @@ def names(self):
 
     @property
     def delayed_mode(self):
+        """
+        Is the PackedSelection in delayed mode?
+
+        Returns
+        -------
+            res: bool
+                True if the PackedSelection is in delayed mode
+        """
         if isinstance(self._data, dask_awkward.Array):
             return True
         elif isinstance(self._data, numpy.ndarray):
@@ -1112,6 +1151,14 @@ def delayed_mode(self):
 
     @property
     def maxitems(self):
+        """
+        What is the maximum supported number of selections in this PackedSelection?
+
+        Returns
+        -------
+            res: bool
+                The maximum supported number of selections
+        """
         return PackedSelection._supported_types[self._dtype]
 
     def __add_delayed(self, name, selection, fill_value):

diff --git a/src/coffea/btag_tools/btagscalefactor.py b/src/coffea/btag_tools/btagscalefactor.py
@@ -19,6 +19,23 @@ class BTagScaleFactor:
             Defaults to 'comb,comb,incl'
         keep_df : bool, optional
             If set true, keep the parsed dataframe as an attribute (.df) for later inspection
+
+    Attributes
+    ----------
+        LOOSE: int
+            Value is 0. This is the integer for the loose WP
+        MEDIUM: int
+            Value is 1. This is the integer for the medium WP
+        TIGHT: int
+            Value is 2. This is the integer for the tight WP
+        RESHAPE: int
+            Value is 3. This is the integer for the reshape WP
+        FLAV_B: int
+            Value is 0. This is the integer to represent the b flavor. Input choice to some methods.
+        FLAV_C: int
+            Value is 1. This is the integer to represent the c flavor. Input choice to some methods.
+        FLAV_UDSG: int
+            Value is 2. This is the integer to represent u, d, and s flavors, as well as gluons. Input choice to some methods.
     """
 
     LOOSE, MEDIUM, TIGHT, RESHAPE = range(4)

diff --git a/src/coffea/dataset_tools/apply_processor.py b/src/coffea/dataset_tools/apply_processor.py
@@ -40,6 +40,7 @@ def apply_to_dataset(
 ) -> DaskOutputType | tuple[DaskOutputType, dask_awkward.Array]:
     """
     Apply the supplied function or processor to the supplied dataset.
+
     Parameters
     ----------
         data_manipulation : ProcessorABC or GenericHEPAnalysis
@@ -97,6 +98,7 @@ def apply_to_fileset(
 ) -> dict[str, DaskOutputType] | tuple[dict[str, DaskOutputType], dask_awkward.Array]:
     """
     Apply the supplied function or processor to the supplied fileset (set of datasets).
+
     Parameters
     ----------
         data_manipulation : ProcessorABC or GenericHEPAnalysis