-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add sources and example notebook for 1D CES inference #161
base: main
Are you sure you want to change the base?
Changes from 49 commits
a5e2a2b
21c2226
02a2b29
21cf1eb
71fdc56
6613818
06327e4
cc1931a
85bc7b5
bd4536e
c6a634b
40189f3
78dc09e
786d664
d7c4917
6058072
c9871b8
1c14ad2
55d6b07
25ac3b5
3fd09df
392ef1e
4cc19c1
1cd0178
3d671e0
321fe1f
bfd25a0
417b455
ebf2ce2
4690f23
e254802
3630470
1a38ba7
9baa0f1
d7a600a
fafa95e
d4be2b7
740462d
ad90ed7
e82e4a7
39ed5f3
f4caf86
966e3f6
3b77f56
fcb32f7
002fafa
070e25f
cf4b587
6ef384d
b371613
f506a22
2889bd9
515c200
1122ec4
469338b
322499b
12872e3
bda4f6c
38586ef
e449e29
01df110
6d6907d
5dcbd44
286b7ce
08ba7fa
fcdf7c8
d57a780
437786a
ce54e48
02aee11
4e28bc1
f9aabb8
684a52d
72db646
fbe7750
72cc0b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,3 +29,4 @@ debug.py | |
docs/source/reference/release_notes.rst | ||
.vscode | ||
.hypothesis | ||
.venv |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
from typing import Dict, Literal | ||
import numpy as np | ||
from scipy.interpolate import interp1d | ||
|
||
from inference_interface import template_to_multihist | ||
from blueice import HistogramPdfSource, Source | ||
from blueice.exceptions import PDFNotComputedException | ||
|
||
from multihist import Hist1d | ||
from alea.ces_transformation import Transformation | ||
|
||
MINIMAL_ENERGY_RESOLUTION = 0.05 | ||
|
||
|
||
class CESTemplateSource(HistogramPdfSource): | ||
def __init__(self, config: Dict, *args, **kwargs): | ||
"""Initialize the TemplateSource.""" | ||
# override the default interpolation method | ||
if "pdf_interpolation_method" not in config: | ||
config["pdf_interpolation_method"] = "piecewise" | ||
super().__init__(config, *args, **kwargs) | ||
|
||
def _load_inputs(self): | ||
self.ces_space = self.config["analysis_space"][0][1] | ||
self.max_e = np.max(self.ces_space) | ||
self.min_e = np.min(self.ces_space) | ||
self.templatename = self.config["templatename"] | ||
self.histname = self.config["histname"] | ||
|
||
def _load_true_histogram(self): | ||
h = template_to_multihist(self.templatename, self.histname, hist_to_read=Hist1d) | ||
return h | ||
|
||
def _check_histogram(self, h: Hist1d): | ||
"""Check if the histogram has expected binning.""" | ||
# We only take 1d histogram in the ces axes | ||
if not isinstance(h, Hist1d): | ||
raise ValueError("Only Hist1d object is supported") | ||
if self.ces_space.ndim != 1: | ||
raise ValueError("Only 1d analysis space is supported") | ||
if np.min(h.histogram) < 0: | ||
raise AssertionError( | ||
f"There are bins for source {self.templatename} with negative entries." | ||
) | ||
|
||
# check if the histogram contains the analysis space. | ||
histogram_max = np.max(h.bin_edges) | ||
histogram_min = np.min(h.bin_edges) | ||
if self.min_e < histogram_min or self.max_e > histogram_max: | ||
raise ValueError( | ||
f"The histogram edge ({histogram_min},{histogram_max}) \ | ||
does not contain the analysis space ({self.min_e},{self.max_e})" | ||
) | ||
|
||
def _create_transformation( | ||
self, transformation_type: Literal["efficiency", "smearing", "bias"] | ||
): | ||
if self.config.get(f"apply_{transformation_type}", True): | ||
parameters_key = f"{transformation_type}_parameters" | ||
model_key = f"{transformation_type}_model" | ||
|
||
if model_key not in self.config: | ||
raise ValueError(f"{transformation_type.capitalize()} model is not provided") | ||
|
||
if parameters_key not in self.config: | ||
raise ValueError(f"{transformation_type.capitalize()} parameters are not provided") | ||
else: | ||
parameter_list = self.config[parameters_key] | ||
# to get the values we need to iterate over the list and use self.config.get | ||
combined_parameter_dict = {k: self.config.get(k) for k in parameter_list} | ||
|
||
# Also take the peak_energy parameter if it is a mono smearing model | ||
if "mono" in self.config[model_key]: | ||
combined_parameter_dict["peak_energy"] = self.config["peak_energy"] | ||
|
||
return Transformation( | ||
parameters=combined_parameter_dict, | ||
action=transformation_type, | ||
model=self.config[model_key], | ||
) | ||
return None | ||
|
||
def _transform_histogram(self, h: Hist1d): | ||
# Create transformations for efficiency, smearing, and bias | ||
efficiency_transformation = self._create_transformation("efficiency") | ||
smearing_transformation = self._create_transformation("smearing") | ||
bias_transformation = self._create_transformation("bias") | ||
|
||
# Apply the transformations to the histogram | ||
if efficiency_transformation is not None: | ||
h = efficiency_transformation.apply_transformation(h) | ||
if smearing_transformation is not None: | ||
h = smearing_transformation.apply_transformation(h) | ||
if bias_transformation is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the bias makes more sense to apply in true energy (i.e. before smearing) what do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually I think it's better to add in the smeared energy. Because the bias should in principle be caused by some area-dependent effect (like merging SEs and lone hits). So for example if a monoenergetic source produces two S2s, 1.1e4 and 1.2e4 separately, the biases would be slightly different for them even if the true energies are the same. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general the bias should be a function of true parameters (I agree if we had a measurement of "true quanta released" we could define our bias in terms of that, and the CES could be considered an approximation of that, but it is only an approximation) |
||
h = bias_transformation.apply_transformation(h) | ||
return h | ||
|
||
def _normalize_histogram(self, h: Hist1d): | ||
# To avoid confusion, we always normalize the histogram, regardless of the bin volume | ||
# So the unit is always events/year/keV, the rate multipliers are always in terms of that | ||
total_integration = np.sum(h.histogram * h.bin_volumes()) | ||
h.histogram /= total_integration | ||
|
||
# Apply the transformations to the histogram | ||
h = self._transform_histogram(h) | ||
|
||
# Calculate the integration of the histogram after all transformations | ||
# Only from min_e to max_e | ||
left_edges = h.bin_edges[:-1] | ||
right_edges = h.bin_edges[1:] | ||
outside_index = np.where((left_edges < self.min_e) | (right_edges > self.max_e)) | ||
h.histogram[outside_index] = 0 | ||
|
||
self._bin_volumes = h.bin_volumes() | ||
self._n_events_histogram = h.similar_blank_histogram() | ||
|
||
# Note that it already does what "fraction_in_roi" does in the old code | ||
# So no need to do again | ||
integration_after_transformation_in_roi = np.sum(h.histogram * h.bin_volumes()) | ||
|
||
self.events_per_year = ( | ||
integration_after_transformation_in_roi * self.config["rate_multiplier"] | ||
) | ||
self.events_per_day = self.events_per_year / 365 | ||
|
||
# For pdf, we need to normalize the histogram to 1 again | ||
return h, integration_after_transformation_in_roi | ||
|
||
def build_histogram(self): | ||
"""Build the histogram of the source. | ||
|
||
It's always called during the initialization of the source. So the attributes are set here. | ||
|
||
""" | ||
print("Building histogram") | ||
self._load_inputs() | ||
h = self._load_true_histogram() | ||
self._check_histogram(h) | ||
h, frac_in_roi = self._normalize_histogram(h) | ||
h_pdf = h | ||
h_pdf /= frac_in_roi | ||
self._pdf_histogram = h_pdf | ||
self.set_dtype() | ||
|
||
def simulate(self, n_events: int): | ||
dtype = [ | ||
("ces", float), | ||
("source", int), | ||
] | ||
ret = np.zeros(n_events, dtype=dtype) | ||
ret["ces"] = self._pdf_histogram.get_random(n_events) | ||
return ret | ||
|
||
def compute_pdf(self): | ||
self.build_histogram() | ||
Source.compute_pdf(self) | ||
|
||
def pdf(self, *args): | ||
# override the default interpolation method in blueice (RegularGridInterpolator) | ||
if not self.pdf_has_been_computed: | ||
raise PDFNotComputedException( | ||
"%s: Attempt to call a PDF that has not been computed" % self | ||
) | ||
|
||
method = self.config["pdf_interpolation_method"] | ||
|
||
if method == "linear": | ||
if not hasattr(self, "_pdf_interpolator"): | ||
# First call: | ||
# Construct a linear interpolator between the histogram bins | ||
self._pdf_interpolator = interp1d( | ||
self._pdf_histogram.bin_centers, | ||
self._pdf_histogram.histogram, | ||
) | ||
bcs = self._pdf_histogram.bin_centers | ||
clipped_data = np.clip(args, bcs.min(), bcs.max()) | ||
|
||
return self._pdf_interpolator(np.transpose(clipped_data)) | ||
|
||
elif method == "piecewise": | ||
return self._pdf_histogram.lookup(*args) | ||
|
||
else: | ||
raise NotImplementedError("PDF Interpolation method %s not implemented" % method) | ||
|
||
def set_dtype(self): | ||
self.dtype = [ | ||
("ces", float), | ||
("source", int), | ||
] | ||
|
||
|
||
class CESMonoenergySource(CESTemplateSource): | ||
def _load_inputs(self): | ||
self.ces_space = self.config["analysis_space"][0][1] | ||
self.max_e = np.max(self.ces_space) | ||
self.min_e = np.min(self.ces_space) | ||
self.mu = self.config["peak_energy"] | ||
|
||
def _load_true_histogram(self): | ||
number_of_bins = int((self.max_e - self.min_e) / MINIMAL_ENERGY_RESOLUTION) | ||
h = Hist1d( | ||
data=np.repeat(self.mu, 1), | ||
bins=number_of_bins, | ||
range=(self.min_e, self.max_e), | ||
) | ||
h.histogram = h.histogram.astype(np.float64) | ||
self.config["smearing_model"] = "mono_" + self.config["smearing_model"] | ||
return h | ||
|
||
|
||
class CESFlatSource(CESTemplateSource): | ||
def _load_inputs(self): | ||
self.ces_space = self.config["analysis_space"][0][1] | ||
self.max_e = np.max(self.ces_space) | ||
self.min_e = np.min(self.ces_space) | ||
|
||
def _load_true_histogram(self): | ||
number_of_bins = int((self.max_e - self.min_e) / MINIMAL_ENERGY_RESOLUTION) | ||
h = Hist1d( | ||
data=np.linspace(self.min_e, self.max_e, number_of_bins), | ||
bins=number_of_bins, | ||
range=(self.min_e, self.max_e), | ||
) | ||
h.histogram = h.histogram.astype(np.float64) | ||
return h | ||
|
||
def _transform_histogram(self, h: Hist1d): | ||
# Only efficiency is applicable for flat source | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Disagree-- you could have an efficiency defined in true energy and then a smearing on top, I think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to bias, efficiency is applied after the smearing. The smearing in our detector is mainly due to the Poisson fluctuation during the production of n_e and n_gamma which is before any detector effects start to get involved. This is the main reason that we set the smearing as the first transformation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO the efficiency should be the probability to detect an event given a certain initial recoil energy. This is what we use for WIMPs, and it is a well-defined probability (that does include the effects you list here as important inputs!). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Knut, to clarify, the efficiency here is only projected in CES space. For S2 threshold and S1 efficiency, we get the CES projected values during the band fitting. For cut acceptance which is data-driven, the values are naturally only in CES. Defining everything in true energy is possible but the gain is limited. It will make every efficiency study entangled with the band fitting which is not necessary for lower or other studies performed in CES. |
||
efficiency_transformation = self._create_transformation("efficiency") | ||
|
||
if efficiency_transformation is not None: | ||
h = efficiency_transformation.apply_transformation(h) | ||
return h |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
from pydantic import BaseModel, validator | ||
from typing import Dict, Optional, Any, Literal, Callable | ||
import numpy as np | ||
from scipy import stats | ||
from copy import deepcopy | ||
from multihist import Hist1d | ||
|
||
|
||
def energy_res(energy, a=25.8, b=1.429): | ||
"""Return energy resolution in keV. | ||
|
||
:param energy: true energy in keV :return: energy resolution in keV | ||
|
||
""" | ||
# Reference for the values of a,b: | ||
# xenon:xenonnt:analysis:ntsciencerun0:g1g2_update#standard_gaussian_vs_skew-gaussian_yue | ||
return (np.sqrt(energy) * a + energy * b) / 100 | ||
|
||
|
||
def smearing_mono_gaussian( | ||
hist: Any, | ||
smearing_a: float, | ||
smearing_b: float, | ||
peak_energy: float, | ||
bins: Optional[np.ndarray] = None, | ||
): | ||
|
||
if bins is None: | ||
# create an emptyzero histogram with the same binning as the input histogram | ||
data = stats.norm.pdf( | ||
hist.bin_centers, | ||
loc=peak_energy, | ||
scale=energy_res(peak_energy, smearing_a, smearing_b), | ||
) | ||
hist_smeared = Hist1d(data=np.zeros_like(data), bins=hist.bin_edges) | ||
hist_smeared.histogram = data | ||
else: | ||
# use the bins that set by the user | ||
bins = np.array(bins) | ||
if bins.size <= 1: | ||
raise ValueError("bins must have at least 2 elements") | ||
bin_centers = 0.5 * (bins[1:] + bins[:-1]) | ||
data = stats.norm.pdf( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With coarse binning, I would propose instead defining the data as the difference of the CDF evaluated at the upper edge of the bin and evaluated at the lower edge of the bin |
||
bin_centers, | ||
loc=peak_energy, | ||
scale=energy_res(peak_energy, smearing_a, smearing_b), | ||
) | ||
# create an empty histogram with the user-defined binning | ||
hist_smeared = Hist1d(data=np.zeros_like(data), bins=bins) | ||
hist_smeared.histogram = data | ||
|
||
return hist_smeared | ||
|
||
|
||
def smearing_hist_gaussian( | ||
hist: Any, | ||
smearing_a: float, | ||
smearing_b: float, | ||
bins: Optional[np.ndarray] = None, | ||
): | ||
"""Smear a histogram. This allows for non-uniform histogram binning. | ||
|
||
:param hist: the spectrum we want to smear :param bins: bin edges of the returned spectrum | ||
:return: smeared histogram in the same unit as input spectrum | ||
|
||
""" | ||
assert isinstance(hist, Hist1d), "Only Hist1d object is supported" | ||
if bins is None: | ||
# set the bins to the bin edges of the input histogram | ||
bins = hist.bin_edges | ||
elif bins.size <= 1: | ||
raise ValueError("bins must have at least 2 elements") | ||
bins = np.array(bins) | ||
|
||
e_true_s, rates, bin_volumes = hist.bin_centers, hist.histogram, hist.bin_volumes() | ||
mask = np.where(e_true_s > 0) | ||
e_true_s = e_true_s[mask] | ||
rates = rates[mask] | ||
bin_volumes = bin_volumes[mask] | ||
|
||
e_smeared_s = 0.5 * (bins[1:] + bins[:-1]) | ||
smeared = np.zeros_like(e_smeared_s) | ||
|
||
for idx, e_smeared in enumerate(e_smeared_s): | ||
probs = ( | ||
stats.norm.pdf( | ||
e_smeared, | ||
loc=e_true_s, | ||
scale=energy_res(e_true_s, smearing_a, smearing_b), | ||
) | ||
* bin_volumes | ||
) | ||
smeared[idx] = np.sum(probs * rates) | ||
|
||
hist_smeared = Hist1d.from_histogram(smeared, bins) | ||
|
||
return hist_smeared | ||
|
||
|
||
def biasing_hist_arctan(hist: Any, A: float = 0.01977, k: float = 0.01707): | ||
"""Apply a constant bias to a histogram. | ||
|
||
:param hist: the spectrum we want to apply the bias to :param bias: the bias to apply to the | ||
spectrum :return: the spectrum with the bias applied | ||
|
||
""" | ||
assert isinstance(hist, Hist1d), "Only Hist1d object is supported" | ||
true_energy = hist.bin_centers | ||
h_bias = deepcopy(hist) | ||
bias_derivative = A * k / (1 + k**2 * true_energy**2) | ||
h_bias.histogram *= 1 / (1 + bias_derivative) | ||
return h_bias | ||
|
||
|
||
def efficiency_hist_constant(hist: Any, efficiency: float): | ||
"""Apply a constant efficiency to a histogram. | ||
|
||
:param hist: the spectrum we want to apply the efficiency to :param efficiency: the efficiency | ||
to apply to the spectrum :return: the spectrum with the efficiency applied | ||
|
||
""" | ||
assert isinstance(hist, Hist1d), "Only Hist1d object is supported" | ||
assert 0 <= efficiency <= 1, "Efficiency must be between 0 and 1" | ||
hist.histogram = hist.histogram * efficiency | ||
return hist | ||
|
||
|
||
MODELS: Dict[str, Dict[str, Callable]] = { | ||
"smearing": { | ||
"gaussian": smearing_hist_gaussian, | ||
"mono_gaussian": smearing_mono_gaussian, | ||
}, | ||
"bias": {"arctan": biasing_hist_arctan}, | ||
"efficiency": { | ||
"constant": efficiency_hist_constant, | ||
}, | ||
} | ||
|
||
|
||
# input: model name, parameters, transformation mode | ||
class Transformation(BaseModel): | ||
parameters: Dict[str, float] | ||
action: Literal["smearing", "bias", "efficiency"] | ||
model: str | ||
|
||
@validator("model") | ||
@classmethod | ||
def check_model(cls, v, values): | ||
if v not in MODELS[values["action"]]: | ||
raise ValueError(f"Model {v} not found for action {values['action']}") | ||
return v | ||
|
||
def apply_transformation(self, histogram: Hist1d): | ||
chosen_model = MODELS[self.action][self.model] | ||
return chosen_model(histogram, **self.parameters) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not think this boundary is required-- for fits, it should be set in the config fit boundary, and the bins can be as fine as we'd like and still be valid. (and other 1D fits may have a different "natural" scale for the bins)