diff --git a/benchmarks/bench_cli.py b/benchmarks/bench_cli.py index a4460759..54b3ddec 100644 --- a/benchmarks/bench_cli.py +++ b/benchmarks/bench_cli.py @@ -113,9 +113,8 @@ def benchmark_scaffold_cli(test_empty_proj): assert "Success: All the folders are correctly configured" in res.output -def benchmark_gen_sv_cli(test_files, tmp_path, test_pdf, lhapdf_path): +def benchmark_gen_sv_cli(test_files, tmp_path): runner = CliRunner() - pdf_name = "NNPDF40_nlo_as_01180" max_as = "2" nf = "5" name_grid = "ATLAS_TTB_8TEV_LJ_TRAP_norensv_fixed.pineappl.lz4" @@ -123,8 +122,6 @@ def benchmark_gen_sv_cli(test_files, tmp_path, test_pdf, lhapdf_path): new_grid_path = tmp_path / name_grid target_path = tmp_path shutil.copy(grid_path, new_grid_path) - with lhapdf_path(test_pdf): - pdf = lhapdf.mkPDF(pdf_name) res = runner.invoke( command, ["ren_sv_grid", str(new_grid_path), str(target_path), max_as, nf, "False"], @@ -134,21 +131,23 @@ def benchmark_gen_sv_cli(test_files, tmp_path, test_pdf, lhapdf_path): def benchmark_kfactor_cli(test_files, tmp_path): runner = CliRunner() - grid_folder = test_files / "data" / "grids" / "400" + conf_file = test_files / "pineko.toml" + theory_id = 400 + dataset = "ATLAS_TTB_FAKE" kfolder = test_files / "data" / "kfactors" - fake_yaml_path = test_files / "data" / "yamldb" / "ATLAS_TTB_FAKE.yaml" - max_as = "3" + order_to_update = "3" target_path = tmp_path res = runner.invoke( command, [ "kfactor", - str(grid_folder), + "-c", + str(conf_file), + str(theory_id), + str(dataset), str(kfolder), - str(fake_yaml_path), str(target_path), - max_as, - "False", + order_to_update, ], ) - assert "The number of bins match the lenght of the k-factor" in res.output + assert "The number of bins match the length of the kfactor" in res.output diff --git a/benchmarks/bench_kfactor.py b/benchmarks/bench_kfactor.py index f7602610..d562f018 100644 --- a/benchmarks/bench_kfactor.py +++ b/benchmarks/bench_kfactor.py @@ -2,19 +2,22 @@ import numpy as np import pineappl -from pineko import kfactor +from pineko import configs, kfactor def benchmark_kfactor_inclusion(test_files, tmp_path, test_pdf, lhapdf_path): - fake_yaml_path = test_files / "data" / "yamldb" / "ATLAS_TTB_FAKE.yaml" - max_as = 3 + + base_configs = configs.load(test_files / "pineko.toml") + configs.configs = configs.defaults(base_configs) + + pto_to_update = 3 # here we want to update NNLO pdf_name = "NNPDF40_nnlo_as_01180" - kfactor.compute_k_factor_grid( - test_files / "data" / "grids" / "400", + kfactor.apply_to_dataset( + 400, + "ATLAS_TTB_FAKE", test_files / "data" / "kfactors", - fake_yaml_path, - max_as, - target_folder=tmp_path, + pto_to_update, + tmp_path, ) pluskfactor_grid_path = tmp_path / "ATLAS_TTB_8TEV_LJ_TRAP.pineappl.lz4" with lhapdf_path(test_pdf): @@ -46,7 +49,7 @@ def benchmark_kfactor_inclusion(test_files, tmp_path, test_pdf, lhapdf_path): np.array([], dtype=bool), sv_list, ).reshape(bin_number, len(sv_list)) - centrals_kfactor, _ = kfactor.read_kfactor( + centrals_kfactor, _ = kfactor.read_from_file( test_files / "data" / "kfactors" / "CF_QCD_ATLAS_TTB_8TEV_LJ_TRAP.dat" ) rtol = 1.0e-15 diff --git a/docs/source/overview/running.rst b/docs/source/overview/running.rst index ef1ee4b7..7194999f 100644 --- a/docs/source/overview/running.rst +++ b/docs/source/overview/running.rst @@ -63,7 +63,7 @@ handy utility functions: - applying the :doc:`FONLL prescription` - applying the :doc:`scale variation prescription` -- burning the :doc:`K-factor` into grids +- burning the :doc:`Kfactor` into grids Checking the grids @@ -100,7 +100,7 @@ Using pineko with NNPDF """"""""""""""""""""""" It is possible to use ``pineko`` without providing an explicit mapping between data and grids -(i.e., without a ``yamldb`` database) or theory cards, by using the data declared in the NNPDF +(i.e., without a ``ymldb`` database) or theory cards, by using the data declared in the NNPDF repository (which includes a data-theory mapping). In order to do this you need to install ``pineko`` with the ``nnpdf`` extra, which installs diff --git a/docs/source/theory/kfactors.rst b/docs/source/theory/kfactors.rst index cb8222cb..02db4758 100644 --- a/docs/source/theory/kfactors.rst +++ b/docs/source/theory/kfactors.rst @@ -1,11 +1,24 @@ -K-Factors +Kfactors ========= Another useful tool that `pineko` includes is ``pineko kfactor`` which allows the embedding of a kfactor as a proper order in a grid. The usage is the following:: - pineko kfactor GRIDS_FOLDER KFACTOR_FOLDER YAMLDB_PATH TARGET_FOLDER MAX_AS ORDER_EXISTS + pineko kfactor THEORY_ID DATASET KFACTOR_FOLDER TARGET_FOLDER PTO_TO_UPDATE [--order_exists] -where ``GRIDS_FOLDER`` is the folder containing the grids to update, ``KFACTOR_FOLDER`` is the folder -containing the kfactor files and ``YAMLDB_PATH`` is the path to the yamldb file of the requested dataset. -The other inputs have already been described in the previous section. +where: + +- ``THEORY_ID`` is the theory ID of the source grid, +- ``DATASET`` is the dataset name, +- ``KFACTOR_FOLDER`` is the folder containing the kfactor files, +- ``TARGET_FOLDER`` is the folder where the new updated grids is going to be saved, +- ``PTO_TO_UPDATE`` is the :math:`\alpha_s` perturbative order to update or create, + with the convention that ``LO=1``, ``NLO=2`` and so on, irrespectively to the powers of :math:`\alpha_s`. + Note also that this differs from the conventions by the NNPDF collaboration, + but it is consistent with the pineappl convention. +- ``--order_exists`` is a flag used to apply the kfactor to the specified perturbative order, insead of the next. + +Note that only pure QCD kfactors are taken into account. +For example to add the NNLO in a grid containing at most NLO one has to select ``PTO_TO_UPDATE=2``; +nn the other hand to reweight the NNLO present in a grid with a kfactor, +one should do ``PTO_TO_UPDATE=2 --order_exists``. diff --git a/pyproject.toml b/pyproject.toml index 1db36a65..be7ca9b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,8 @@ authors = [ "Alessandro Candido ", "Andrea Barontini ", "Felix Hekhorn ", + "Giacomo Magni ", + "Roy Stegeman ", "Juan Cruz-Martinez ", ] classifiers = [ diff --git a/src/pineko/check.py b/src/pineko/check.py index 67f0add2..3aebfc39 100644 --- a/src/pineko/check.py +++ b/src/pineko/check.py @@ -88,7 +88,7 @@ def check_grid_and_eko_compatible(pineappl_grid, operators, xif, max_as, max_al) xif : float factorization scale variation max_as: int - max order of alpa_s + max order of alpha_s max_al: int max order of alpha diff --git a/src/pineko/cli/kfactor.py b/src/pineko/cli/kfactor.py index 2283a78b..2ebe712b 100644 --- a/src/pineko/cli/kfactor.py +++ b/src/pineko/cli/kfactor.py @@ -5,34 +5,35 @@ import click from .. import kfactor -from ._base import command +from ._base import command, config_option, load_config @command.command("kfactor") -@click.argument("grids_folder", type=click.Path(exists=True)) +@config_option +@click.argument("theoryID", type=int) +@click.argument("dataset", type=str) @click.argument("kfactor_folder", type=click.Path(exists=True)) -@click.argument("yamldb_path", type=click.Path(exists=True)) @click.argument("target_folder", type=click.Path(exists=True)) -@click.argument("max_as", type=int) -@click.argument("order_exists", type=bool) -def k_factor_inclusion( - grids_folder, +@click.argument("pto_to_update", type=int) +@click.option("--order_exists", is_flag=True, help="Overwrite an existing order.") +def kfactor_inclusion( + cfg, + theoryid, + dataset, kfactor_folder, - yamldb_path, target_folder, - max_as, + pto_to_update, order_exists, ): - """Construct new grid with k_factor included.""" - grids_folder = pathlib.Path(grids_folder) + """Construct new grid with kfactor included.""" + load_config(cfg) kfactor_folder = pathlib.Path(kfactor_folder) - yamldb_path = pathlib.Path(yamldb_path) target_folder = pathlib.Path(target_folder) - kfactor.compute_k_factor_grid( - grids_folder, + kfactor.apply_to_dataset( + theoryid, + dataset, kfactor_folder, - yamldb_path, - max_as, + pto_to_update, target_folder=target_folder, order_exists=order_exists, ) diff --git a/src/pineko/kfactor.py b/src/pineko/kfactor.py index 92f616df..23320de7 100644 --- a/src/pineko/kfactor.py +++ b/src/pineko/kfactor.py @@ -1,4 +1,4 @@ -"""Module to include QCD K-factors in grids.""" +"""Module to include QCD kfactors in grids.""" import io @@ -8,23 +8,14 @@ import yaml from pineappl import import_only_subgrid -from . import scale_variations +from . import configs, fonll, scale_variations, utils +from .scale_variations import orders_as_tuple DEFAULT_PDF_SET = "NNPDF40_nnlo_as_01180" -def factgrid(subgrid): - """Return the array of the factorization scales squared from a subgrid.""" - return np.array([mu2.fac for mu2 in subgrid.mu2_grid()]) - - -def rengrid(subgrid): - """Return the array of the renormalization scales squared from a subgrid.""" - return np.array([mu2.ren for mu2 in subgrid.mu2_grid()]) - - -def read_kfactor(kfactor_path): - """Read the k-factor and returns the central values and the pdfset used to compute it.""" +def read_from_file(kfactor_path): + """Read the kfactor and returns the central values and the pdfset used to compute it.""" with open(kfactor_path, encoding="utf-8") as f: stars = f.readline() if not stars.startswith("*"): @@ -42,7 +33,7 @@ def read_kfactor(kfactor_path): data = data.reshape(-1, 2) central_value = data[:, 0] pdf_set = description.split(sep="PDFset:")[-1].split(sep="\n")[0].strip() - # If there is no PDF set in the k-factor, a default PDF set will be used + # If there is no PDF set in the kfactor, a default PDF set will be used # If the PDF set written in the file is not an actual lhapdf PDF, it will # raise an error. if len(pdf_set) == 0: @@ -50,149 +41,117 @@ def read_kfactor(kfactor_path): return central_value, pdf_set -def construct_scales_array( - mu2_ren_grid, - m_value, - order, - new_order, - central_k_factor, - bin_index, - alphas, - order_exists, -): - """Construct the array that will rescale the subgrid array taking into account the different renormalization scales.""" - scales_array = [] - for mu2 in mu2_ren_grid: - scales_array.append( - compute_scale_factor( - m_value, - order, - new_order, - mu2, - central_k_factor, - bin_index, - alphas, - order_exists, - ) - ) - return scales_array - - def compute_scale_factor( - m, - nec_order, - to_construct_order, - Q2, - central_k_factor, + order, + order_to_update, + mu2, + central_kfactor, bin_index, alphas, - order_exists, ): - """Compute the factor to be multiplied to the given nec_order. + """Compute the factor to be multiplied to the given the required order. Parameters ---------- - m : int - first non zero perturbative order - nec_order : tuple(int) + order : tuple(int) tuple of the order that has to be rescaled to get the final order - to_contruct_order : tuple(int) - tuple of the scale varied order to be constructed - Q2: float + order_to_update : tuple(int) + order to update + mu2: float energy scale squared of the bin - central_k_factor: list(float) - list of the centrals k-factors + central_kfactor: list(float) + list of the centrals kfactors bin_index: int index of the bin alphas: lhapdf.AlphaS alpha_s object - - Returns - ------- - float - full contribution factor """ - max_as = to_construct_order[0] - m - alpha_val = alphas.alphasQ2(Q2) - alpha_term = 1.0 / pow(alpha_val, max_as - (nec_order[0] - m)) - k_term = central_k_factor[bin_index] - 1.0 - if order_exists and (max_as - (nec_order[0] - m)) == 0: - k_term = central_k_factor[bin_index] + alpha_val = alphas.alphasQ2(mu2) + max_as = order_to_update[0] + as_order = order[0] + alpha_term = 1.0 / pow(alpha_val, max_as - as_order) + k_term = central_kfactor[bin_index] - 1.0 return k_term * alpha_term -def scale_subgrid(extracted_subgrid, scales_array): - """Rescales the array contained in the subgrid using scales_array and returns a new subgrid constructed with the scaled array.""" - original_array = extracted_subgrid.to_array3() +def scale_subgrid(subgrid, scales_array): + """Rescales the array contained in the subgrid using scales_array.""" + original_array = subgrid.to_array3() if len(original_array) != len(scales_array): raise ValueError("The original and the scales arrays have different shapes.") + # construct subgrid scaled_array = [] for scale_value, arr_to_scale in zip(scales_array, original_array): scaled_array_nest = [] for arr in arr_to_scale: scaled_array_nest.append(list(arr * scale_value)) scaled_array.append(scaled_array_nest) - x1grid = extracted_subgrid.x1_grid() - x2grid = extracted_subgrid.x2_grid() if len(scales_array) == 0: scaled_array = np.zeros(shape=(0, 0, 0), dtype=float) else: scaled_array = np.array(scaled_array, dtype=float) - mu2_grid = [tuple([mu2.ren, mu2.fac]) for mu2 in extracted_subgrid.mu2_grid()] + # get coordinates + x1grid = subgrid.x1_grid() + x2grid = subgrid.x2_grid() + mu2_grid = [tuple([mu2.ren, mu2.fac]) for mu2 in subgrid.mu2_grid()] + # assemble scaled_subgrid = import_only_subgrid.ImportOnlySubgridV2( scaled_array, mu2_grid, x1grid, x2grid ) return scaled_subgrid -def compute_orders_map(m, max_as, min_al, order_exists): - """Compute a dictionary with all the necessary orders to compute the requested order. +def is_already_in_no_logs(to_check, list_orders): + """Check if the requested order is already in the grid.""" + for order in list_orders: + if ( + order[-2] == 0 + and order[-1] == 0 + and (order[0] == to_check[0]) + and (order[1] == to_check[1]) + ): + return True + return False + + +def construct_new_order(grid, order, order_to_update, central_kfactor, alphas): + """Construct a scaled grid, with the given order. Parameters ---------- - m : int - first non zero perturbative order of the grid - max_as : int - max alpha_s order - min_al : int - al order of leading order - Returns - ------- - dict(tuple(int)) - description of all the needed orders + grid : pineappl.grid + loaded grid + order : tuple + current alpha_s order + order_to_update: tuple + alpha_s order to update + central_kfactor : np.ndarray + kfactors to apply + alphas : lhapdf.AlphaS + alphas """ - add = 0 - if order_exists: - add = 1 - orders = {} - orders[(m + max_as, min_al, 0, 0)] = [ - (m + de, min_al, 0, 0) for de in range(max_as + add) - ] - return orders - - -def create_singlegridonly( - grid, m_value, order, new_order, central_k_factor, alphas, order_exists -): - """Create a grid containing only the contribution given by new_order.""" - new_grid = scale_variations.initialize_new_grid(grid, new_order) # extract the relevant order to rescale from the grid for each lumi and bin - grid_orders = [order.as_tuple() for order in grid.orders()] - order_index = grid_orders.index(order) + grid_orders = orders_as_tuple(grid) + + new_grid = scale_variations.initialize_new_grid(grid, order_to_update) + original_order_index = grid_orders.index(order) + for lumi_index in range(len(new_grid.channels())): for bin_index in range(grid.bins()): - extracted_subgrid = grid.subgrid(order_index, bin_index, lumi_index) - scales_array = construct_scales_array( - rengrid(extracted_subgrid), - m_value, - order, - new_order, - central_k_factor, - bin_index, - alphas, - order_exists, - ) - scaled_subgrid = scale_subgrid(extracted_subgrid, scales_array) + subgrid = grid.subgrid(original_order_index, bin_index, lumi_index) + mu2_ren_grid = [mu2.ren for mu2 in subgrid.mu2_grid()] + scales_array = [ + compute_scale_factor( + order, + order_to_update, + mu2, + central_kfactor, + bin_index, + alphas, + ) + for mu2 in mu2_ren_grid + ] + scaled_subgrid = scale_subgrid(subgrid, scales_array) # Set this subgrid inside the new grid new_grid.set_subgrid(0, bin_index, lumi_index, scaled_subgrid) # Fixing bin_limits and normalizations @@ -209,210 +168,180 @@ def create_singlegridonly( return new_grid -def create_grids( - gridpath, - max_as, - first_non_zero_as_order, - min_al, - centrals_k_factor, +def apply_to_grid( + central_kfactor, alphas, + grid, + pto_to_update, + target_grid_path, order_exists, ): - """Create all the necessary grids for a certain starting grid.""" - grid = pineappl.grid.Grid.read(gridpath) - m_value = first_non_zero_as_order - nec_orders = compute_orders_map(m_value, max_as, min_al, order_exists) - grid_list = {} - for to_construct_order in nec_orders: - list_grid_order = [] - for nec_order in nec_orders[to_construct_order]: - list_grid_order.append( - create_singlegridonly( - grid, - m_value, - nec_order, - to_construct_order, - centrals_k_factor, - alphas, - order_exists, - ) - ) - grid_list[to_construct_order] = list_grid_order - - return grid_list, nec_orders + """Apply the centrals_kfactor to the grid. + Parameters + ---------- + central_kfactor : np.ndarray + kfactors to apply + alphas : lhapdf.AlphaS + alphas + grid : pineappl.grid + loaded grid + pto_to_update : int + perturbative order to update: 1 = LO, 2 = NLO ... + no matter which power of alpha_s it is. + target_grid_path: pathlib.Path + path where store the new grid + order_exists: bool + True if the order to update is already present + """ + grid_orders = orders_as_tuple(grid) -def is_already_in(to_check, list_orders): - """Check if the requested order is already in the grid.""" - for order in list_orders: - if ( - order[-2] == 0 - and order[-1] == 0 - and (order[0] == to_check[0]) - and (order[1] == to_check[1]) - ): - return True - return False - + # remove not necessary orders + # NOTE: eventual QED kfactors are not supported + order_mask = pineappl.grid.Order.create_mask(grid.orders(), pto_to_update, 0, True) + grid_orders_filtered = list(np.array(grid_orders)[order_mask]) + grid_orders_filtered.sort(key=scale_variations.qcd) + min_as = grid_orders_filtered[0][0] + # TODO: this is always going to be 0, given the mask above ... + min_al = grid_orders_filtered[0][1] -def construct_and_merge_grids( - grid_path, - max_as, - first_nonzero_order, - min_al, - centrals_kfactor, - alphas, - target_folder, - order_exists, -): - """Create, write and merge all the grids.""" - # Creating all the necessary grids - grid_list, nec_orders = create_grids( - grid_path, - max_as, - first_nonzero_order[0], - min_al, - centrals_kfactor, - alphas, - order_exists, - ) - # Writing the sv grids - grids_paths = scale_variations.write_grids(gridpath=grid_path, grid_list=grid_list) - # Merging all together - scale_variations.merge_grids( - gridpath=grid_path, - grid_list_path=grids_paths, - target_path=target_folder, - nec_orders=nec_orders, - order_exists=order_exists, - ) + # the actual alpha_s order to update + order_to_update = pto_to_update + min_as - 1 + order_to_update = (order_to_update, min_al, 0, 0) + # check if the order is already there + is_in = is_already_in_no_logs(order_to_update, grid_orders_filtered) -def do_it( - centrals_kfactor, - alphas, - grid_path, - grid, - max_as, - max_as_test, - target_folder, - order_exists, -): - """Apply the centrals_kfactor to the grid if the order is not already there.""" - grid_orders = [orde.as_tuple() for orde in grid.orders()] - order_mask = pineappl.grid.Order.create_mask(grid.orders(), max_as, 0, True) - grid_orders_filtered = list(np.array(grid_orders)[order_mask]) - grid_orders_filtered.sort(key=scale_variations.qcd) - first_nonzero_order = grid_orders_filtered[0] - min_al = first_nonzero_order[1] - is_in = is_already_in( - (first_nonzero_order[0] + max_as_test, min_al, 0, 0), grid_orders_filtered - ) + # Prevent summing orders incoherently if is_in and not order_exists: rich.print("[green] Success: Requested order already in the grid.") return if not is_in and order_exists: rich.print("[red] Abort: order exists is True but order not in the grid.") return - construct_and_merge_grids( - grid_path, - max_as_test, - first_nonzero_order, - min_al, - centrals_kfactor, - alphas, - target_folder, - order_exists, - ) - -def filter_k_factors(pigrid, centrals_kfactor): - """Filter the centrals k-factors according to their lenght compared to the number of bins of the grid.""" - centrals_kfactor_filtered = np.array([]) - if pigrid.bins() == len(centrals_kfactor): - rich.print("[orange] The number of bins match the lenght of the k-factor.") - centrals_kfactor_filtered = centrals_kfactor - elif pigrid.bins() < len(centrals_kfactor): - rich.print( - "[yellow] The number of bins is less than the lenght of the k-factor." + # loop on all the order to update + max_as = grid_orders_filtered[-1][0] + orders_list = [(de, min_al, 0, 0) for de in range(min_as, max_as + 1)] + # create an empty grid and add the rescaled order + new_order_grid = None + for i, as_order in enumerate(orders_list): + order_grid = construct_new_order( + grid, as_order, order_to_update, central_kfactor, alphas ) - if not all(elem == centrals_kfactor[0] for elem in centrals_kfactor): - # This case is actually wrong. - raise ValueError("KFactor contains too many different values.") - centrals_kfactor_filtered = centrals_kfactor - else: - rich.print( - "[yellow] The number of bins is more than the lenght of the k-factor." + if i == 0: + new_order_grid = order_grid + else: + new_order_grid.merge(order_grid) + + new_grid = grid + # if the new order is there, clean the old one. + if is_in: + new_grid = scale_variations.construct_and_dump_order_exists_grid( + grid, order_to_update ) + # merge the updated order with the original one. + new_grid.merge(new_order_grid) + new_grid.write_lz4(target_grid_path) - # This is the last case in which grid.bins() > len(centrals_kfactor) - - # Note that sometimes there are more bins in the grid than in the cfactor file - - # this is not a problem because in those cases either all cfactor values are the - # same (thus there is no doubt about whether we have the correct one) or the - # non-exisiting cfactors would be multiplied by bins corresponding to all '0' in the - # grid. - # Let's check if we are in the first or second case - if len(np.unique(centrals_kfactor)) == 1: - # In this case I just need to add more elements to the kfactor - for _num in range(pigrid.bins()): - centrals_kfactor_filtered = np.append( - centrals_kfactor_filtered, centrals_kfactor[0] - ) - else: - # In this case this means that the missing entries will multiply zero subgrids so we can just add 0s - for _num in range(pigrid.bins()): - centrals_kfactor_filtered = np.append(centrals_kfactor_filtered, 0.0) - return centrals_kfactor_filtered +def to_list(grid, central_kfactors): + """Cast the centrals kfactors to the correct length. + + Apply a normalization according to the length compared to the number of bins of the grid. -def compute_k_factor_grid( - grids_folder, + Parameters + ---------- + grid: pineappl.grid.Grid + grid + centrals_kfactor: list + list of kfactor for each point + """ + if grid.bins() == len(central_kfactors): + rich.print("The number of bins match the length of the kfactor.") + return central_kfactors + if grid.bins() < len(central_kfactors): + rich.print( + "[yellow] The number of bins is less than the length of the kfactor." + ) + if not len(np.unique(central_kfactors)) == 1: + # This case is actually wrong. + raise ValueError("kfactor contains too many different values.") + return central_kfactors + + rich.print("[yellow] The number of bins is more than the length of the kfactor.") + + # This is the last case in which grid.bins() > len(centrals_kfactor) + # Note that sometimes there are more bins in the grid than in the kfactor file - + # this is not a problem because in those cases either all kfactor values are the + # same (thus there is no doubt about whether we have the correct one) or the + # non-existing kfactor would be multiplied by bins corresponding to all '0' in the + # grid. + # Let's check if we are in the first or second case + if len(np.unique(central_kfactors)) == 1: + # In this case I just need to add more elements to the kfactor + return np.full(grid.bins(), central_kfactors[0]) + # In this case this means that the missing entries will + # multiply zero subgrids so we can just add 0s + return np.full(grid.bins(), 0) + + +def apply_to_dataset( + theoryid, + dataset, kfactor_folder, - yamldb_path, - max_as, - target_folder=None, + pto_to_update, + target_folder, order_exists=False, ): - """Include the k-factor in the grid in order to have its associated order in the grid itself. + """Include the kfactor in the grid in order to have its associated order in the grid itself. Parameters ---------- - grids_folder : pathlib.Path() - pineappl grids folder + theoryid : int + theory ID of the source grid + dataset : str + datset name kfactor_folder : pathlib.Path() kfactors folder - yamldb_path : pathlib.Path() - path to the yaml file describing the dataset - max_as : int - max as order + pto_to_update : int + perturbative order to update: 1 = LO, 2 = NLO ... + no matter which power of alpha_s it is. target_folder: pathlib.Path - path where store the new grid (optional) + path where store the new grid + order_exists: bool + True if the order to update is already present """ import lhapdf # pylint: disable=import-error,import-outside-toplevel - # With respect to the usual convention here max_as is max_as-1 - max_as_test = max_as - 1 # Extracting info from yaml file - with open(yamldb_path, encoding="utf-8") as f: - yamldict = yaml.safe_load(f) - for grid_list in yamldict["operands"]: - for grid in grid_list: - cfac_path = kfactor_folder / f"CF_QCD_{grid}.dat" - if "ATLASDY2D8TEV" in grid: - cfac_path = kfactor_folder / f"CF_QCDEWK_{grid}.dat" - centrals_kfactor, pdf_set = read_kfactor(cfac_path) - alphas = lhapdf.mkAlphaS(pdf_set) - grid_path = grids_folder / (f"{grid}.pineappl.lz4") - pigrid = pineappl.grid.Grid.read(grid_path) - centrals_kfactor_filtered = filter_k_factors(pigrid, centrals_kfactor) - do_it( - centrals_kfactor_filtered, - alphas, - grid_path, - pigrid, - max_as, - max_as_test, - target_folder, - order_exists, - ) + grid_list = utils.read_grids_from_nnpdf(dataset, configs.configs) + if grid_list is None: + grid_list = fonll.grids_names( + configs.configs["paths"]["ymldb"] / f"{dataset}.yaml" + ) + + # loop on grids_name + for grid in grid_list: + # TODO: generalize for other type of kfactors ? + grid_name = grid.split(".")[0] + kfactor_path = kfactor_folder / f"CF_QCD_{grid_name}.dat" + if "ATLASDY2D8TEV" in grid: + kfactor_path = kfactor_folder / f"CF_QCDEWK_{grid_name}.dat" + + current_grid = pineappl.grid.Grid.read( + configs.configs["paths"]["grids"] / str(theoryid) / grid + ) + + central_kfactor, pdf_set = read_from_file(kfactor_path) + central_kfactor_filtered = to_list(current_grid, central_kfactor) + alphas = lhapdf.mkAlphaS(pdf_set) + + apply_to_grid( + central_kfactor_filtered, + alphas, + current_grid, + pto_to_update, + target_folder / grid, + order_exists, + ) diff --git a/src/pineko/scale_variations.py b/src/pineko/scale_variations.py index 04ff9d80..f06f38b6 100644 --- a/src/pineko/scale_variations.py +++ b/src/pineko/scale_variations.py @@ -31,6 +31,11 @@ def qcd(order: OrderTuple) -> int: return order[0] +def orders_as_tuple(grid: pineappl.grid.Grid) -> List[OrderTuple]: + """Return grid orders as a tuple.""" + return [order.as_tuple() for order in grid.orders()] + + def ren_sv_coeffs(m, max_as, logpart, which_part, nf): """Ren_sv coefficient for the requested part. @@ -114,7 +119,7 @@ def create_svonly(grid, order, new_order, scalefactor): """Create a grid containing only the renormalization scale variations at a given order for a grid.""" new_grid = initialize_new_grid(grid, new_order) # extract the relevant order to rescale from the grid for each lumi and bin - grid_orders = [order.as_tuple() for order in grid.orders()] + grid_orders = orders_as_tuple(grid) order_index = grid_orders.index(order) for lumi_index in range(len(new_grid.channels())): for bin_index in range(grid.bins()): @@ -137,7 +142,7 @@ def create_svonly(grid, order, new_order, scalefactor): def create_grids(gridpath, max_as, nf): """Create all the necessary scale variations grids for a certain starting grid.""" grid = pineappl.grid.Grid.read(gridpath) - grid_orders = [orde.as_tuple() for orde in grid.orders()] + grid_orders = orders_as_tuple(grid) order_mask = pineappl.grid.Order.create_mask(grid.orders(), max_as, 0, True) grid_orders_filtered = list(np.array(grid_orders)[order_mask]) grid_orders_filtered.sort(key=qcd) @@ -159,55 +164,24 @@ def create_grids(gridpath, max_as, nf): ) grid_list[to_construct_order] = list_grid_order - return grid_list, nec_orders - - -def write_grids(gridpath, grid_list): - """Write the single grids.""" - base_name = gridpath.stem.split(".pineappl")[0] - final_part = ".pineappl.lz4" - grid_paths = [] - for order in grid_list: - # For each scale variation order, if more than one grid contributes, merge them all together in a single one - if len(grid_list[order]) > 1: - for grid in grid_list[order][1:]: - tmp_path = gridpath.parent / ("tmp" + final_part) - grid.write_lz4(tmp_path) - grid_list[order][0].merge_from_file(tmp_path) - tmp_path.unlink() - new_grid_path = gridpath.parent / ( - base_name + "_" + str(order[2]) + final_part - ) # order[2] is the ren_sv order - grid_paths.append(new_grid_path) - grid_list[order][0].write_lz4(new_grid_path) - return grid_paths - - -def merge_grids( - gridpath, grid_list_path, target_path=None, nec_orders=None, order_exists=False -): - """Merge the single grids in the original.""" - grid = pineappl.grid.Grid.read(gridpath) - if target_path is None: - target_path = gridpath.parent / gridpath.name - else: - target_path = target_path / gridpath.name - if order_exists: - grid = construct_and_dump_order_exists_grid( - grid, list(nec_orders.keys())[0] if nec_orders is not None else [] - ) - for grid_path in grid_list_path: - grid.merge_from_file(grid_path) - grid_path.unlink() - grid.write_lz4(target_path) + return grid_list def construct_and_dump_order_exists_grid(ori_grid, to_construct_order): - """Remove the order that has to be substituted from the grid.""" + """Remove the order that has to be substituted from the grid. + + Parameters + ---------- + ori_grid: + original grid + to_construct_order: + order to delete + """ + # TODO: can we make this function simpler ?? bin_limits = [float(bin) for bin in range(ori_grid.bins() + 1)] lumi_grid = [pineappl.lumi.LumiEntry(mylum) for mylum in ori_grid.channels()] subgrid_params = pineappl.subgrid.SubgridParams() - ori_grid_orders = [order.as_tuple() for order in ori_grid.orders()] + ori_grid_orders = orders_as_tuple(ori_grid) new_orders = [ pineappl.grid.Order(*ord) for ord in ori_grid_orders @@ -216,13 +190,13 @@ def construct_and_dump_order_exists_grid(ori_grid, to_construct_order): new_grid = pineappl.grid.Grid.create( lumi_grid, new_orders, bin_limits, subgrid_params ) - orders_indeces = [ori_grid_orders.index(order.as_tuple()) for order in new_orders] - for order_index in orders_indeces: + orders_indices = [ori_grid_orders.index(order.as_tuple()) for order in new_orders] + for order_index in orders_indices: for lumi_index in range(len(lumi_grid)): for bin_index in range(ori_grid.bins()): extr_subgrid = ori_grid.subgrid(order_index, bin_index, lumi_index) new_grid.set_subgrid( - orders_indeces.index(order_index), + orders_indices.index(order_index), bin_index, lumi_index, extr_subgrid, @@ -240,8 +214,11 @@ def construct_and_dump_order_exists_grid(ori_grid, to_construct_order): norma = ori_grid.bin_normalizations() remap_obj = pineappl.bin.BinRemapper(norma, limits) new_grid.set_remapper(remap_obj) + + # propagate metadata for k, v in ori_grid.key_values().items(): new_grid.set_key_value(k, v) + return new_grid @@ -254,9 +231,8 @@ def compute_ren_sv_grid( ): """Generate renormalization scale variation terms for the given grid, according to the max_as.""" # First let's check if the ren_sv are already there - checkres, max_as_effective = check.contains_sv( - pineappl.grid.Grid.read(grid_path), max_as, 0, check.Scale.REN - ) + grid = pineappl.grid.Grid.read(grid_path) + checkres, max_as_effective = check.contains_sv(grid, max_as, 0, check.Scale.REN) # Usual different convention with max_as if max_as_effective == max_as and (checkres is not check.AvailableAtMax.CENTRAL): if not order_exists: @@ -265,18 +241,19 @@ def compute_ren_sv_grid( return ReturnState.ORDER_EXISTS_FAILURE if max_as_effective < max_as and checkres is check.AvailableAtMax.SCVAR: return ReturnState.MISSING_CENTRAL + # Create all the necessary grids # With respect to the usual convention here max_as is max_as-1 - max_as -= 1 - # Creating all the necessary grids - grid_list, nec_orders = create_grids(grid_path, max_as, nf) - # Writing the sv grids - sv_grids_paths = write_grids(gridpath=grid_path, grid_list=grid_list) - # Merging all together - merge_grids( - gridpath=grid_path, - grid_list_path=sv_grids_paths, - target_path=target_path, - nec_orders=nec_orders, - order_exists=order_exists, - ) + grid_list = create_grids(grid_path, max_as - 1, nf) + + # merge them + for sv_order, sv_grids in grid_list.items(): + # if the new order is there, clean the old one. + if sv_order in orders_as_tuple(grid): + grid = construct_and_dump_order_exists_grid(grid, list(grid_list)[0]) + for sv_grid in sv_grids: + grid.merge(sv_grid) + # save + if target_path is None: + target_path = grid_path.parent + grid.write_lz4(target_path / grid_path.name) return ReturnState.SUCCESS diff --git a/tests/test_kfactor.py b/tests/test_kfactor.py index d8c602ef..095b6802 100644 --- a/tests/test_kfactor.py +++ b/tests/test_kfactor.py @@ -27,51 +27,44 @@ def test_compute_scale_factor(): bin_index = 1 np.testing.assert_allclose( kfactor.compute_scale_factor( - 0, [0, 0, 0, 0], [1, 0, 0, 0], 5.0**2, fake_kfactor, bin_index, myfakealpha, - False, ), (1.0 / const_value) * (fake_kfactor[bin_index] - 1.0), ) np.testing.assert_allclose( kfactor.compute_scale_factor( - 0, [0, 0, 0, 0], [2, 0, 0, 0], 5.0**2, fake_kfactor, bin_index, myfakealpha, - False, ), (1.0 / (const_value**2)) * (fake_kfactor[bin_index] - 1.0), ) -def test_filter_k_factors(): +def test_to_list(): fakegrid = FakeGrid(3) - # This is the case in which kfactor lenght matches with number of bins + # default: kfactor length matches with number of bins + k = [1.0, 1.2, 1.3] + np.testing.assert_allclose(kfactor.to_list(fakegrid, k), k) + # kfactor length > number of bins and kfactors are all the same + # having too much kfactors is fine np.testing.assert_allclose( - kfactor.filter_k_factors(fakegrid, [1.0, 1.2, 1.3]), [1.0, 1.2, 1.3] - ) - # This is the case in which kfactor lenght > number of bins and kfactors are all the same - np.testing.assert_allclose( - kfactor.filter_k_factors(fakegrid, [1.1, 1.1, 1.1, 1.1, 1.1]), - [1.1, 1.1, 1.1, 1.1, 1.1], - ) - # This is the case in which kfactor lenght < number of bins and kfactors are all the same - np.testing.assert_allclose( - kfactor.filter_k_factors(fakegrid, [1.1, 1.1]), [1.1, 1.1, 1.1] - ) - # This is the case in which kfactor lenght < number of bins and kfactors are not all the same - np.testing.assert_allclose( - kfactor.filter_k_factors(fakegrid, [1.1, 1.3]), [0.0, 0.0, 0.0] + kfactor.to_list(fakegrid, [1.1] * 5), + [1.1] * 5, ) + # kfactor length < number of bins and kfactors are all the same + # we know how to extrapolate + np.testing.assert_allclose(kfactor.to_list(fakegrid, [1.1] * 2), [1.1] * 3) + # kfactor length < number of bins and kfactors are not all the same + np.testing.assert_allclose(kfactor.to_list(fakegrid, [1.1, 1.3]), [0.0, 0.0, 0.0]) with pytest.raises(ValueError): - # This is the case in which kfactor lenght > number of bins and kfactors are not all the same - kfactor.filter_k_factors(fakegrid, [1.1, 1.2, 1.1, 1.7, 1.1]) + # kfactor length > number of bins and kfactors are not all the same + kfactor.to_list(fakegrid, [1.1, 1.2, 1.1, 1.7, 1.1])