Restructured inference tasks to use mixins for parameter definitions.

uhh-cms · jomatthi · Jun 6, 2024 · Jun 6, 2024 · Jun 11, 2024 · Jun 11, 2024
commit 5cc280e6add5002c699789738cb0ff588e8e04c9
diff --git a/topsf/tasks/inference_v2/combine_base.py b/topsf/tasks/inference_v2/combine_base.py
@@ -24,14 +24,7 @@ class CombineBaseTask(
     asimov_data = luigi.BoolParameter(
         default=True,
         significant=False,
-        description="Use Asimov data for the fit",
-    )
-
-    mode = luigi.ChoiceParameter(
-        choices=["exp", "obs"],
-        default="exp",
-        significant=True,
-        description="Mode of the combine tool",
+        description="Use Asimov data",
     )
 
     # upstream requirements

diff --git a/topsf/tasks/inference_v2/fit_mixin.py b/topsf/tasks/inference_v2/fit_mixin.py
diff --git a/topsf/tasks/inference_v2/gen_toys.py b/topsf/tasks/inference_v2/gen_toys.py
@@ -1,41 +1,22 @@
 # coding: utf-8
 
-import luigi
 import law
 import os
 
 from topsf.tasks.inference_v2.combine_base import CombineBaseTask
+from topsf.tasks.inference_v2.mixins import ToysMixin
 
 
 class GenToysV2(
     CombineBaseTask,
+    ToysMixin,
 ):
-    n_toys = luigi.IntParameter(
-        default=-1,
-        significant=False,
-        description="Number of toys to generate",
-    )
-
-    set_parameters = law.CSVParameter(
-        significant=False,
-        description="Set parameters for the toys",
-    )
-
-    freeze_gen_parameters = law.CSVParameter(
-        significant=False,
-        description="Freeze parameters for the generation",
-    )
-
-    save_toys = luigi.BoolParameter(
-        default=True,
-        significant=False,
-        description="Save the generated toys",
-    )
-
-    gen_name = luigi.Parameter(
-        significant=True,
-        description="Name of the generated toys",
-    )
+    def output(self):
+        output_dict = {
+            "toy_file": self.target(f"higgsCombine{self.gen_name}.GenerateOnly.mH120.123456.root"),
+            "gen_toys_log": self.target("gen_toys.log"),
+        }
+        return output_dict
 
     @property
     def gen_toys_name(self):
@@ -47,13 +28,6 @@ def store_parts(self) -> law.util.InsertableDict:
         parts.insert_after("fit_v2", "gen_toys", self.gen_toys_name)
         return parts
 
-    def output(self):
-        output_dict = {
-            "toy_file": self.target(f"higgsCombine{self.gen_name}.GenerateOnly.mH120.123456.root"),
-            "gen_toys_log": self.target("gen_toys.log"),
-        }
-        return output_dict
-
     @law.decorator.log
     @law.decorator.safe_output
     def run(self):
@@ -66,7 +40,7 @@ def run(self):
         os.makedirs(output_dirname, exist_ok=True)
 
         # turn inputs into strings understandable by combine
-        new_set_parameters = ",".join(self.set_parameters)
+        new_set_parameters = ",".join(self.set_gen_parameters)
         new_freeze_gen_parameters = ",".join(self.freeze_gen_parameters)
 
         command_to_run = f"combine -M {self.combine_method}"

diff --git a/topsf/tasks/inference_v2/impacts.py b/topsf/tasks/inference_v2/impacts.py
@@ -1,6 +1,5 @@
 # coding: utf-8
 
-import luigi
 import law
 import os
 
@@ -11,64 +10,14 @@
 from topsf.tasks.inference_v2.inference_base import InferenceBaseTask
 from topsf.tasks.inference_v2.workspace import CreateWorkspaceV2
 from topsf.tasks.inference_v2.gen_toys import GenToysV2
+from topsf.tasks.inference_v2.mixins import ModeMixin, ImpactsMixin, ToysMixin
 
 
-class ImpactsV2(
+class ImpactsBaseV2(
     InferenceBaseTask,
+    ModeMixin,
+    ImpactsMixin,
 ):
-    mass = luigi.IntParameter(
-        significant=True,
-        description="Mass point",
-    )
-
-    robust_fit = luigi.IntParameter(
-        significant=False,
-        description="Run a robust fit",
-    )
-
-    combine_parallel = luigi.IntParameter(
-        significant=False,
-        description="Run the fits in parallel",
-    )
-
-    asimov_data = luigi.BoolParameter(
-        default=True,
-        significant=False,
-        description="Use Asimov data for the fit",
-    )
-
-    mode = luigi.ChoiceParameter(
-        choices=["exp", "obs"],
-        default="exp",
-        significant=True,
-        description="Mode of the combine tool",
-    )
-
-    # upstream requirements
-    reqs = Requirements(
-        RemoteWorkflow.reqs,
-        CreateDatacards=CreateDatacards,
-        CreateWorkspace=CreateWorkspaceV2,
-        GenToys=GenToysV2,
-    )
-
-    def workflow_requires(self):
-        reqs = super().workflow_requires()
-
-        reqs["workspace"] = self.requires_from_branch()
-        if self.mode == "exp":
-            reqs["toy_file"] = self.requires_from_branch()
-
-        return reqs
-
-    def requires(self):
-        reqs = {
-            "workspace": self.reqs.CreateWorkspace.req(self),
-        }
-        if self.mode == "exp":
-            reqs["gen_toys"] = self.reqs.GenToys.req(self)
-        return reqs
-
     def output(self):
         output_dict = {}
         output_dict[f"impacts_{self.mode}"] = self.target(f"impacts_{self.mode}.json")
@@ -129,3 +78,57 @@ def run(self):
         # store all outputs in log file
         output = out_initial + out_impacts + out_collect
         self.output()[f"impacts_{self.mode}_log"].dump(output, formatter="text")
+
+
+class ImpactsExpV2(
+    ImpactsBaseV2,
+    ToysMixin,
+):
+
+    # upstream requirements
+    reqs = Requirements(
+        RemoteWorkflow.reqs,
+        CreateDatacards=CreateDatacards,
+        CreateWorkspace=CreateWorkspaceV2,
+        GenToys=GenToysV2,
+    )
+
+    def workflow_requires(self):
+        reqs = super().workflow_requires()
+
+        reqs["workspace"] = self.requires_from_branch()
+        reqs["toy_file"] = self.requires_from_branch()
+
+        return reqs
+
+    def requires(self):
+        reqs = {
+            "workspace": self.reqs.CreateWorkspace.req(self),
+            "gen_toys": self.reqs.GenToys.req(self),
+        }
+        return reqs
+
+
+class ImpactsObsV2(
+    ImpactsBaseV2,
+):
+
+    # upstream requirements
+    reqs = Requirements(
+        RemoteWorkflow.reqs,
+        CreateDatacards=CreateDatacards,
+        CreateWorkspace=CreateWorkspaceV2,
+    )
+
+    def workflow_requires(self):
+        reqs = super().workflow_requires()
+
+        reqs["workspace"] = self.requires_from_branch()
+
+        return reqs
+
+    def requires(self):
+        reqs = {
+            "workspace": self.reqs.CreateWorkspace.req(self),
+        }
+        return reqs
diff --git a/topsf/tasks/inference_v2/inference_base.py b/topsf/tasks/inference_v2/inference_base.py
@@ -8,13 +8,24 @@
 from columnflow.tasks.framework.remote import RemoteWorkflow
 from topsf.tasks.inference import CreateDatacards
 from columnflow.util import dev_sandbox
+from columnflow.tasks.framework.mixins import (
+    CalibratorsMixin, SelectorStepsMixin, ProducersMixin, MLModelsMixin, InferenceModelMixin,
+)
 
-from topsf.tasks.inference_v2.fit_mixin import FitMixin
+from topsf.tasks.inference_v2.mixins import FitMixin
+from topsf.tasks.base import TopSFTask
 
 
 class InferenceBaseTask(
     FitMixin,
-    CreateDatacards,
+    TopSFTask,
+    InferenceModelMixin,
+    MLModelsMixin,
+    ProducersMixin,
+    SelectorStepsMixin,
+    CalibratorsMixin,
+    law.LocalWorkflow,
+    RemoteWorkflow,
     CommandTask,
 ):
     sandbox = dev_sandbox(law.config.get("analysis", "combine_sandbox"))