Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Constraints with arbitrary callables #11

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
430fca0
First untested attempt at numba-fied, complex constraints
sognetic Sep 6, 2022
a73b54d
Merge branch 'refactor/model_builder' into feature/complex_constraints
sognetic Sep 13, 2022
1a91467
Working complex constraints, added example to demonstrate this feature.
sognetic Sep 14, 2022
b0bb6b6
Merge branch 'refactor/model_builder' into feature/complex_constraints
sognetic Oct 7, 2022
b97df5c
Merge branch 'refactor/model_builder' into feature/complex_constraints
sognetic Oct 12, 2022
c992da2
Improve error message
sognetic Nov 15, 2022
faa8fa2
Merge branch 'master' into feature/complex_constraints
sognetic Nov 17, 2022
cb8e2f7
Merge branch 'bugfix/performance_enhancements' into feature/complex_c…
sognetic Nov 18, 2022
253f2d3
Merge branch 'bugfix/performance_enhancements' into feature/complex_c…
sognetic Nov 20, 2022
69ab430
Merge branch 'bugfix/performance_enhancements' into feature/complex_c…
sognetic Nov 21, 2022
e5faaf3
Make sure there are no NaNs in the dataset
sognetic Nov 30, 2022
18fcc48
Test if constraint value matches initial parameters. Allow feeding ou…
sognetic Dec 1, 2022
f05c332
Merge branch 'bugfix/performance_enhancements' into feature/complex_c…
sognetic Dec 1, 2022
18eb541
Merge branch 'bugfix/performance_enhancements' into feature/complex_c…
sognetic Dec 9, 2022
a958f93
Merge branch 'master' of github.com:FelixMetzner/TemplateFitter
sognetic Dec 13, 2022
9e0166b
Merge branch 'master' into feature/complex_constraints
sognetic Dec 13, 2022
d24a1d9
Add the possibility to turn of numba functionality after finalizing t…
sognetic Jan 17, 2023
6b939d2
Cleanup the ParameterTranslator model to remove any attributes added …
sognetic Jan 18, 2023
0e360de
Fix comment and type hint
sognetic Feb 7, 2023
9bcfce6
Lambda functions used in the constraint can only be pickled using dil…
sognetic Feb 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions examples/example_with_complex_constraint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#!/usr/bin/env python
# coding: utf-8

""""
This is a basic example of a template fit with two templates in a single fit dimension and channel.
"""

import numpy as np
import pandas as pd
from templatefitter.fit_model.model_builder import FitModel
from templatefitter.fit_model.parameter_handler import ParameterHandler
from templatefitter.fit_model.template import Template
from templatefitter.fitter import TemplateFitter
from templatefitter.plotter.fit_result_plots import FitResultPlotter
from templatefitter.plotter.histogram_variable import HistVariable
from templatefitter.plotter.plot_style import KITColors

from pathlib import Path


def gen_sig(size, voi, voi_limits):
df = pd.DataFrame(
{voi: np.random.normal((voi_limits[1] + voi_limits[0]) / 2, 1, int(size)), "weight": [1.0] * int(size)}
)
df.name = "signal"
return df


def gen_bkg(size, voi, voi_limits, index):
df = pd.DataFrame(
{
voi: voi_limits[0] + (voi_limits[1] - voi_limits[0]) * np.random.random_sample(int(size)),
"weight": [1.0] * int(size),
}
)
df.name = f"background_{index}"
return df


def gen_bkg_two(size, voi, voi_limits, index):
df = pd.DataFrame(
{voi: np.random.normal((voi_limits[1] + voi_limits[0]) / 2.6, 1, int(size)), "weight": [1.0] * int(size)}
)
df.name = f"background_{index}"
return df


def run_basic_example():

# Defining a variable and range for which we will generate data
voi = "variableOfInterest"
voi_physics_limits = (0, 20)

# Defining a HistVariable object for the fit to use. Note: The scope / fit range used here can be (and is) smaller
# then the one used for generating the dataset.
# This is done for illustration here but can sometimes be beneficial.
# The fitter will later correct the yields so they apply to the entire dataset.
fit_var = HistVariable(df_label=voi, n_bins=10, scope=(5, 10), var_name="Variable of Interest")

# Defining a decay channel and two components from which templates will be generated
channel_name = "XtoYDecay"
components = [
gen_sig(1000, voi, voi_physics_limits),
gen_bkg(3000, voi, voi_physics_limits, 1),
gen_bkg_two(1000, voi, voi_physics_limits, 2),
]
component_colors = {
"signal": KITColors.kit_red,
"background_1": KITColors.light_grey,
"background_2": KITColors.kit_green,
}

# How much simulated data is there in each component?
initial_yield_dict = {component.name: component.loc[:, "weight"].sum() for component in components}

# Which fraction is within our fit window?
# This value is later used to extrapolate the fit yield to the entire variable range.
initial_eff_dict = {
component.name: component.loc[component[fit_var.df_label].between(*fit_var.scope), "weight"].sum()
/ initial_yield_dict[component.name]
for component in components
}

# Defining a ParameterHandler which holds all model parameters and a FitModel object which describes the rest of the fit
param_handler = ParameterHandler()
model = FitModel(parameter_handler=param_handler)

# Giving the fit model some yield and efficiency parameters. We'll not fit the efficiency parameter in this example,
# to do this we would need additional channels
for component in components:
model.add_model_parameter(
name=f"yield_{component.name}",
parameter_type=ParameterHandler.yield_parameter_type,
floating=True,
initial_value=initial_yield_dict[component.name],
)

model.add_model_parameter(
name=f"eff_{channel_name}_{component.name}",
parameter_type=ParameterHandler.efficiency_parameter_type,
floating=False,
initial_value=initial_eff_dict[component.name],
)

# First creating some templates, then adding them to the template
templates = []
for component_number, component in enumerate(components):
template = Template(
name=f"{channel_name}_{component.name}",
latex_label=component.name,
process_name=component.name,
color=component_colors[component.name],
dimensions=1,
bins=fit_var.n_bins,
scope=fit_var.scope,
params=param_handler,
data_column_names=fit_var.df_label,
data=component,
weights="weight",
)

templates.append(template)

model.add_template(template=template, yield_parameter=f"yield_{component.name}")

# Now we have to tell the fit model how to relate templates and efficiency parameters (together as a channel)
model.add_channel(
efficiency_parameters=[f"eff_{channel_name}_{component.name}" for component in components],
name=channel_name,
components=templates,
)

# Adding a constraint to the sum of the two background components
model.add_constraint(
["yield_background_1", "yield_background_2"],
2000.0,
10.0,
lambda x: 0.3333 * x["yield_background_1"] + x["yield_background_2"],
)

# Instead of real data we'll add some Asimov data to the fitter. This is equivalent to the sum of all templates.
model.add_asimov_data_from_templates()

# Lastly we'll finalize the model. This finalized model could now be saved with Python's pickle serializer.
model.finalize_model()

# Now that we're done creating the model, we can see how data and MC agreed before the fit. Agreement should be perfect
# in a fit to Asimov data.
# To do this, we first have to set up a FitResultPlotter.

output_folder = Path("./plots")
output_folder.mkdir(exist_ok=True)
fit_result_plotter_m2 = FitResultPlotter(reference_dimension=0, variables_by_channel=(fit_var,), fit_model=model)

# This also returns the path of the plot files that are created
fit_result_plotter_m2.plot_fit_result(use_initial_values=True, output_dir_path=output_folder, output_name_tag="")

# Here we'll now set up a TemplateFitter object and perform the fit itself.
# We'll use nuisance parameter for each bin and each template in the model.
fitter = TemplateFitter(fit_model=model, minimizer_id="iminuit")
fit_result = fitter.do_fit(update_templates=True, get_hesse=True, verbose=True, fix_nui_params=False)

# Now let's plot the fit results
fit_result_plotter_m2.plot_fit_result(output_dir_path=output_folder, output_name_tag="")

# Lastly, we can have a look at the significance of the signal + background hypothesis vs. a background only hypothesis.
# Note: You might get a warning from numpy which you can safely ignore here.
significance_dict = {}
for yield_parameter in param_handler.get_parameter_names_for_type(ParameterHandler.yield_parameter_type):
significance_dict[yield_parameter] = fitter.get_significance(
yield_parameter=yield_parameter, fix_nui_params=False, verbose=True, catch_exception=True
)

return fit_result, significance_dict


if __name__ == "__main__":
fit_result, sign_dict = run_basic_example()

print(f"The result of the fit is: \n {str(fit_result.params)} \n")

print(
f"The significance of the signal + background hypothesis vs. only background is {sign_dict['yield_signal']:.1f} sigma."
)
Loading