From 10195869dc6a0ec8337e4994684226933bfabbe5 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Wed, 27 Nov 2024 01:26:52 +0100 Subject: [PATCH 1/7] Add a module that downloads HepData raw tables --- .../{rawdata => corrmat}/corrmat.corr | 0 .../commondata/LHCB_Z0_8TEV_MUON/filter.py | 8 +- .../HEPData-ins1406555-v1-Table_1.yaml | 184 ---------------- .../HEPData-ins1406555-v1-Table_2.yaml | 200 ------------------ .../filter_utils/download_hepdata.py | 63 ++++++ nnpdf_data/pyproject.toml | 3 +- 6 files changed, 70 insertions(+), 388 deletions(-) rename nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/{rawdata => corrmat}/corrmat.corr (100%) delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_1.yaml delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_2.yaml create mode 100644 nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/corrmat.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/corrmat/corrmat.corr similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/corrmat.corr rename to nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/corrmat/corrmat.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py index dfd9c1e824..0642847185 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py @@ -4,6 +4,7 @@ import pandas as pd import yaml +from nnpdf_data.filter_utils.download_hepdata import HepDataTables from nnpdf_data.filter_utils.utils import covmat_to_artunc, prettify_float yaml.add_representer(float, prettify_float) @@ -36,8 +37,8 @@ def load_yaml(table_id: int, version: int = 1) -> dict: ditionary containing the table contents """ - filename = f"HEPData-ins1406555-v{version}-Table_{table_id}" - table = pathlib.Path(f"./rawdata/{filename}.yaml") + foldername = f"HEPData-ins1406555-v{version}-yaml" + table = pathlib.Path(f"./{foldername}/Table{table_id}.yaml") return yaml.safe_load(table.read_text()) @@ -162,7 +163,7 @@ def read_corrmatrix(nb_datapoints: int) -> np.ndarray: """ corrmat = pd.read_csv( - "./rawdata/corrmat.corr", + "./corrmat/corrmat.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -378,4 +379,5 @@ def main_filter(boson: str = "Z") -> None: if __name__ == "__main__": + HepDataTables(metadata="./metadata.yaml").download() main_filter(boson="Z") diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_1.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_1.yaml deleted file mode 100644 index 0332da55b0..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_1.yaml +++ /dev/null @@ -1,184 +0,0 @@ -dependent_variables: -- header: {name: '$\sigma_{W^{+} \to \mu^{+}\nu}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> W+ < MU+ NU > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {label: stat, symerror: 1.2} - - {label: sys, symerror: 3.2} - - {label: 'sys,beam', symerror: 2.4} - - {label: 'sys,lumi', symerror: 2.7} - value: 236.5 - - errors: - - {label: stat, symerror: 0.9} - - {label: sys, symerror: 2.2} - - {label: 'sys,beam', symerror: 2.1} - - {label: 'sys,lumi', symerror: 2.4} - value: 208.4 - - errors: - - {label: stat, symerror: 0.8} - - {label: sys, symerror: 1.8} - - {label: 'sys,beam', symerror: 1.8} - - {label: 'sys,lumi', symerror: 2.1} - value: 182.0 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.6} - - {label: 'sys,beam', symerror: 1.5} - - {label: 'sys,lumi', symerror: 1.8} - value: 153.3 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.3} - - {label: 'sys,beam', symerror: 1.2} - - {label: 'sys,lumi', symerror: 1.4} - value: 119.5 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 1.0} - - {label: 'sys,beam', symerror: 0.8} - - {label: 'sys,lumi', symerror: 1.0} - value: 84.4 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 0.9} - - {label: 'sys,lumi', symerror: 1.0} - value: 86.4 - - errors: - - {label: stat, symerror: 0.4} - - {label: sys, symerror: 0.7} - - {label: 'sys,beam', symerror: 0.2} - - {label: 'sys,lumi', symerror: 0.3} - value: 23.0 -- header: {name: '$f^{W^+}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: ''} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {symerror: 0.0047} - value: 1.0188 - - errors: - - {symerror: 0.0028} - value: 1.0163 - - errors: - - {symerror: 0.0025} - value: 1.0158 - - errors: - - {symerror: 0.0028} - value: 1.0148 - - errors: - - {symerror: 0.0032} - value: 1.0152 - - errors: - - {symerror: 0.0046} - value: 1.015 - - errors: - - {symerror: 0.0045} - value: 1.0175 - - errors: - - {symerror: 0.0087} - value: 1.0211 -- header: {name: '$\sigma_{W^{-} \to \mu^{-}\overline{\nu}}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> W- < MU- NUBAR > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {label: stat, symerror: 0.9} - - {label: sys, symerror: 1.8} - - {label: 'sys,beam', symerror: 1.2} - - {label: 'sys,lumi', symerror: 1.6} - value: 134.0 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.4} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.4} - value: 119.8 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.3} - value: 110.6 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 0.9} - - {label: 'sys,lumi', symerror: 1.2} - value: 102.4 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.1} - - {label: 'sys,beam', symerror: 0.8} - - {label: 'sys,lumi', symerror: 1.1} - value: 92.5 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 0.9} - - {label: 'sys,beam', symerror: 0.7} - - {label: 'sys,lumi', symerror: 0.9} - value: 79.9 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.5} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.4} - value: 119.3 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.6} - - {label: 'sys,beam', symerror: 0.5} - - {label: 'sys,lumi', symerror: 0.7} - value: 60.0 -- header: {name: '$f^{W^-}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: ''} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {symerror: 0.0026} - value: 1.0172 - - errors: - - {symerror: 0.0027} - value: 1.0155 - - errors: - - {symerror: 0.0028} - value: 1.0153 - - errors: - - {symerror: 0.003} - value: 1.0162 - - errors: - - {symerror: 0.0031} - value: 1.016 - - errors: - - {symerror: 0.0033} - value: 1.0176 - - errors: - - {symerror: 0.0033} - value: 1.02 - - errors: - - {symerror: 0.0053} - value: 1.0243 -independent_variables: -- header: {name: '$\eta^{\mu}$'} - values: - - {high: 2.25, low: 2.0} - - {high: 2.5, low: 2.25} - - {high: 2.75, low: 2.5} - - {high: 3.0, low: 2.75} - - {high: 3.25, low: 3.0} - - {high: 3.5, low: 3.25} - - {high: 4.0, low: 3.5} - - {high: 4.5, low: 4.0} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_2.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_2.yaml deleted file mode 100644 index 9a39670c3b..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_2.yaml +++ /dev/null @@ -1,200 +0,0 @@ -dependent_variables: -- header: {name: '$\sigma_{Z \to \mu^{+}\mu^{-}}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: M(MU+ MU-), units: GeV, value: 60.0-120.0} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < MU+ MU- > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {label: stat, symerror: 0.033} - - {label: sys, symerror: 0.055} - - {label: 'sys,beam', symerror: 0.014} - - {label: 'sys,lumi', symerror: 0.014} - value: 1.223 - - errors: - - {label: stat, symerror: 0.051} - - {label: sys, symerror: 0.06} - - {label: 'sys,beam', symerror: 0.038} - - {label: 'sys,lumi', symerror: 0.038} - value: 3.263 - - errors: - - {label: stat, symerror: 0.062} - - {label: sys, symerror: 0.064} - - {label: 'sys,beam', symerror: 0.057} - - {label: 'sys,lumi', symerror: 0.058} - value: 4.983 - - errors: - - {label: stat, symerror: 0.07} - - {label: sys, symerror: 0.072} - - {label: 'sys,beam', symerror: 0.077} - - {label: 'sys,lumi', symerror: 0.078} - value: 6.719 - - errors: - - {label: stat, symerror: 0.076} - - {label: sys, symerror: 0.074} - - {label: 'sys,beam', symerror: 0.093} - - {label: 'sys,lumi', symerror: 0.094} - value: 8.051 - - errors: - - {label: stat, symerror: 0.079} - - {label: sys, symerror: 0.074} - - {label: 'sys,beam', symerror: 0.103} - - {label: 'sys,lumi', symerror: 0.105} - value: 8.967 - - errors: - - {label: stat, symerror: 0.081} - - {label: sys, symerror: 0.076} - - {label: 'sys,beam', symerror: 0.11} - - {label: 'sys,lumi', symerror: 0.112} - value: 9.561 - - errors: - - {label: stat, symerror: 0.082} - - {label: sys, symerror: 0.071} - - {label: 'sys,beam', symerror: 0.113} - - {label: 'sys,lumi', symerror: 0.115} - value: 9.822 - - errors: - - {label: stat, symerror: 0.081} - - {label: sys, symerror: 0.074} - - {label: 'sys,beam', symerror: 0.112} - - {label: 'sys,lumi', symerror: 0.114} - value: 9.721 - - errors: - - {label: stat, symerror: 0.078} - - {label: sys, symerror: 0.071} - - {label: 'sys,beam', symerror: 0.104} - - {label: 'sys,lumi', symerror: 0.105} - value: 9.03 - - errors: - - {label: stat, symerror: 0.072} - - {label: sys, symerror: 0.074} - - {label: 'sys,beam', symerror: 0.089} - - {label: 'sys,lumi', symerror: 0.09} - value: 7.748 - - errors: - - {label: stat, symerror: 0.063} - - {label: sys, symerror: 0.051} - - {label: 'sys,beam', symerror: 0.07} - - {label: 'sys,lumi', symerror: 0.071} - value: 6.059 - - errors: - - {label: stat, symerror: 0.054} - - {label: sys, symerror: 0.041} - - {label: 'sys,beam', symerror: 0.05} - - {label: 'sys,lumi', symerror: 0.051} - value: 4.385 - - errors: - - {label: stat, symerror: 0.042} - - {label: sys, symerror: 0.027} - - {label: 'sys,beam', symerror: 0.031} - - {label: 'sys,lumi', symerror: 0.032} - value: 2.724 - - errors: - - {label: stat, symerror: 0.032} - - {label: sys, symerror: 0.02} - - {label: 'sys,beam', symerror: 0.018} - - {label: 'sys,lumi', symerror: 0.019} - value: 1.584 - - errors: - - {label: stat, symerror: 0.022} - - {label: sys, symerror: 0.012} - - {label: 'sys,beam', symerror: 0.009} - - {label: 'sys,lumi', symerror: 0.009} - value: 0.749 - - errors: - - {label: stat, symerror: 0.016} - - {label: sys, symerror: 0.008} - - {label: 'sys,beam', symerror: 0.004} - - {label: 'sys,lumi', symerror: 0.004} - value: 0.383 - - errors: - - {label: stat, symerror: 0.003} - - {label: sys, symerror: 0.001} - - {label: 'sys,beam', symerror: 0.0} - - {label: 'sys,lumi', symerror: 0.0} - value: 0.011 -- header: {name: '$f^{Z}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: M(MU+ MU-), units: GeV, value: 60.0-120.0} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < MU+ MU- > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {symerror: 0.0395} - value: 1.0466 - - errors: - - {symerror: 0.0119} - value: 1.0305 - - errors: - - {symerror: 0.0069} - value: 1.0277 - - errors: - - {symerror: 0.0061} - value: 1.0252 - - errors: - - {symerror: 0.0048} - value: 1.0264 - - errors: - - {symerror: 0.0032} - value: 1.0257 - - errors: - - {symerror: 0.0038} - value: 1.0258 - - errors: - - {symerror: 0.0027} - value: 1.0252 - - errors: - - {symerror: 0.0035} - value: 1.0282 - - errors: - - {symerror: 0.003} - value: 1.0264 - - errors: - - {symerror: 0.0066} - value: 1.0261 - - errors: - - {symerror: 0.004} - value: 1.0248 - - errors: - - {symerror: 0.006} - value: 1.0258 - - errors: - - {symerror: 0.0053} - value: 1.0228 - - errors: - - {symerror: 0.0079} - value: 1.018 - - errors: - - {symerror: 0.01} - value: 1.0207 - - errors: - - {symerror: 0.014} - value: 1.0183 - - errors: - - {symerror: 0.0761} - value: 1.0177 -independent_variables: -- header: {name: '$y_{Z}$'} - values: - - {high: 2.125, low: 2.0} - - {high: 2.25, low: 2.125} - - {high: 2.375, low: 2.25} - - {high: 2.5, low: 2.375} - - {high: 2.625, low: 2.5} - - {high: 2.75, low: 2.625} - - {high: 2.875, low: 2.75} - - {high: 3.0, low: 2.875} - - {high: 3.125, low: 3.0} - - {high: 3.25, low: 3.125} - - {high: 3.375, low: 3.25} - - {high: 3.5, low: 3.375} - - {high: 3.625, low: 3.5} - - {high: 3.75, low: 3.625} - - {high: 3.875, low: 3.75} - - {high: 4.0, low: 3.875} - - {high: 4.25, low: 4.0} - - {high: 4.5, low: 4.25} diff --git a/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py b/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py new file mode 100644 index 0000000000..b6451470b7 --- /dev/null +++ b/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py @@ -0,0 +1,63 @@ +""" +A module to download the HepData tables whenever the commondata +need to be regenerated. As a result, raw data tables are no longer +stored in the repository. It relies on the HepData API for the +downloading of the tables: + +https://github.com/HEPData/hepdata-cli/tree/main + +TODO: If downloading the raw tables separately for each dataset +turns out to be very slow, a possibility wouldb be to download +all the the raw data tables at once. +""" + +import os +import pathlib +import tarfile + +from hepdata_cli.api import Client, getFilename_fromCd +from hepdata_cli.resilient_requests import resilient_requests +from yaml import safe_load + +CLIENT = Client(verbose=True) + + +class HepDataTables: + """A commondata class to download the raw HepData tables. + + Parameters: + ----------- + metadata: str + a path the metadata of the current data + """ + + def __init__(self, metadata: str) -> None: + self.metadata = safe_load(pathlib.Path(metadata).read_text()) + + def get_hepdata_url(self) -> str: + # TODO: possibly Check the metadata vs HepData versions here + hepdata_id = self.metadata["hepdata"]["url"].split("/")[-1] + # NOTE: `id_list` can support many IDs allowing for downloading + # mutiple datasets at the same time. + urls = CLIENT._build_urls( + id_list=hepdata_id, file_format='yaml', ids='hepdata', table_name='' + ) + return urls[0] + + def download(self) -> None: + url = self.get_hepdata_url() + print(f"Downloading tables from: {url}") + response = resilient_requests('get', url, allow_redirects=True) + + filename = getFilename_fromCd(response.headers.get('content-disposition')) + filepath = pathlib.Path().parent + print(filepath) + # filepath.mkdir(exist_ok=True) + raw_path = f"{filepath}/{filename}" + + open(raw_path, 'wb').write(response.content) + with tarfile.open(raw_path, "r:gz" if raw_path.endswith("tar.gz") else "r:") as tar: + tar.extractall(path=pathlib.Path(filepath)) + os.remove(raw_path) + + return diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml index 68e272ccb7..a171ec7cde 100644 --- a/nnpdf_data/pyproject.toml +++ b/nnpdf_data/pyproject.toml @@ -38,11 +38,12 @@ python = "^3.9" pandas = {version = "*", optional = true} numpy = {version = "*", optional = true} scipy = {version = "*", optional = true} +hepdata_cli = {version = "*", optional = true} openpyxl = {version = "*", optional = true} uproot = {version = "*", optional = true} [tool.poetry.extras] -filter = ["openpyxl", "numpy", "scipy", "pandas", "uproot"] +filter = ["openpyxl", "numpy", "scipy", "pandas", "hepdata_cli", "uproot"] [tool.poetry-dynamic-versioning] enable = true From 37cace11415e3a92bdcc7b143a71951b73f8ef5c Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Wed, 27 Nov 2024 16:22:09 +0100 Subject: [PATCH 2/7] Swap module for a script and download all raw tables at once --- .../filter_utils/download_hepdata.py | 86 +++++++++++++++---- nnpdf_data/pyproject.toml | 3 + 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py b/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py index b6451470b7..20ed2b7d92 100644 --- a/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py +++ b/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py @@ -14,45 +14,59 @@ import os import pathlib import tarfile +from typing import Generator from hepdata_cli.api import Client, getFilename_fromCd from hepdata_cli.resilient_requests import resilient_requests from yaml import safe_load CLIENT = Client(verbose=True) +COMMONDATA_PATH = pathlib.Path(__file__).parents[1] class HepDataTables: """A commondata class to download the raw HepData tables. - Parameters: - ----------- + Parameters + ---------- metadata: str a path the metadata of the current data """ - def __init__(self, metadata: str) -> None: - self.metadata = safe_load(pathlib.Path(metadata).read_text()) + def __init__(self) -> None: + self.metadata_files = self._get_metadata_files() - def get_hepdata_url(self) -> str: - # TODO: possibly Check the metadata vs HepData versions here - hepdata_id = self.metadata["hepdata"]["url"].split("/")[-1] - # NOTE: `id_list` can support many IDs allowing for downloading - # mutiple datasets at the same time. - urls = CLIENT._build_urls( - id_list=hepdata_id, file_format='yaml', ids='hepdata', table_name='' - ) - return urls[0] + @staticmethod + def _get_metadata_files() -> Generator: + return COMMONDATA_PATH.glob("**/metadata.yaml") + + @staticmethod + def _get_hepdata_id(metadata_file: pathlib.Path) -> str | None: + """Get the HepData ID from the metadata for a given dataset. + + Parameters + ---------- + metadata_file: pathlib.Path + path to the metadata file - def download(self) -> None: - url = self.get_hepdata_url() + Returns + ------- + str | None: returns the HepData ID if the dataset is on HepData + """ + metadata_yaml = safe_load(pathlib.Path(metadata_file).read_text()) + if "hepdata" in metadata_yaml.keys(): + url = metadata_yaml["hepdata"].get("url", None) + return url.split("/")[-1] if url is not None else None + else: + return None + + @staticmethod + def _download_data(url: str, dataset_name: str) -> None: print(f"Downloading tables from: {url}") response = resilient_requests('get', url, allow_redirects=True) filename = getFilename_fromCd(response.headers.get('content-disposition')) - filepath = pathlib.Path().parent - print(filepath) - # filepath.mkdir(exist_ok=True) + filepath = COMMONDATA_PATH.joinpath(f"commondata/{dataset_name}") raw_path = f"{filepath}/{filename}" open(raw_path, 'wb').write(response.content) @@ -61,3 +75,39 @@ def download(self) -> None: os.remove(raw_path) return + + def get_hepdata_urls(self) -> tuple[list[str], list[str]]: + id_list = [] + dataset = [] + for meta in self.metadata_files: + hep_id = self._get_hepdata_id(metadata_file=meta) + if hep_id is not None and hep_id.startswith("ins"): + id_list.append(hep_id) + dataset.append(str(meta).split("/")[-2]) + hepdata_id = " ".join(id_list) + + urls = CLIENT._build_urls( + id_list=hepdata_id, file_format='yaml', ids='hepdata', table_name='' + ) + return urls, dataset + + def download_all(self) -> None: + urls, dataset_names = self.get_hepdata_urls() + print("Finished fetching all the URLs.") + for url, dataname in zip(urls, dataset_names): + print(f"Downloading: {dataname}") + self._download_data(url=url, dataset_name=dataname) + print("Raw data tables for all the datasets have been downloaded.") + return + + def download_from_server(self) -> None: + """Download all of the datasets from the NNPDF server.""" + return + + def compress_raw_data(self) -> None: + """Compress the raw data tables to be uploaded to the NNPDF server.""" + return + + +def main(): + HepDataTables().download_all() diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml index a171ec7cde..d8996915c8 100644 --- a/nnpdf_data/pyproject.toml +++ b/nnpdf_data/pyproject.toml @@ -31,6 +31,9 @@ include = [ "nnpdf_data/_version.py", ] +[tool.poetry.scripts] +download-hepdata = "nnpdf_data.filter_utils.download_hepdata:main" + [tool.poetry.dependencies] python = "^3.9" "ruamel.yaml" = "<0.18" From c3fc56f007dd6221898764a4772f2655b39ffde7 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Fri, 29 Nov 2024 15:10:33 +0100 Subject: [PATCH 3/7] Fix `version` in metadata of some datasets --- nnpdf_data/nnpdf_data/commondata/CDF_Z0_1P96TEV/metadata.yaml | 1 + .../nnpdf_data/commondata/CMS_1JET_13TEV_DIF/metadata.yaml | 2 +- nnpdf_data/nnpdf_data/commondata/STAR_WMWP_510GEV/metadata.yaml | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CDF_Z0_1P96TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CDF_Z0_1P96TEV/metadata.yaml index 9960e3dcd2..f15c1099d0 100644 --- a/nnpdf_data/nnpdf_data/commondata/CDF_Z0_1P96TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CDF_Z0_1P96TEV/metadata.yaml @@ -11,6 +11,7 @@ iNSPIRE: url: https://inspirehep.net/literature/856131 hepdata: url: https://www.hepdata.net/record/ins856131 + version: 1 implemented_observables: - observable_name: ZRAP observable: diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_1JET_13TEV_DIF/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_1JET_13TEV_DIF/metadata.yaml index cc56da4e30..52ef6fae0d 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_1JET_13TEV_DIF/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_1JET_13TEV_DIF/metadata.yaml @@ -10,7 +10,7 @@ iNSPIRE: url: "https://inspirehep.net/literature/1972986" hepdata: url: "https://www.hepdata.net/record/ins1972986" - version: 1 + version: 2 version: 1 version_comment: "NA" diff --git a/nnpdf_data/nnpdf_data/commondata/STAR_WMWP_510GEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/STAR_WMWP_510GEV/metadata.yaml index 384e702a15..71cf84ef25 100644 --- a/nnpdf_data/nnpdf_data/commondata/STAR_WMWP_510GEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/STAR_WMWP_510GEV/metadata.yaml @@ -11,6 +11,7 @@ iNSPIRE: url: "https://inspirehep.net/literature/1708793" hepdata: url: "https://www.hepdata.net/record/ins1708793" + version: 1 nnpdf_metadata: nnpdf31_process: "DY CC" From 8d9534f98694e700cd61c8fea465b3b74437b8af Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Fri, 29 Nov 2024 15:12:49 +0100 Subject: [PATCH 4/7] Add method to compress the raw hepdata tables --- .../LHCB_Z0_8TEV_MUON/corrmat/corrmat.corr | 34 ------------------- .../commondata/LHCB_Z0_8TEV_MUON/filter.py | 4 +-- .../filter_utils/download_hepdata.py | 29 ++++++++++++---- 3 files changed, 23 insertions(+), 44 deletions(-) delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/corrmat/corrmat.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/corrmat/corrmat.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/corrmat/corrmat.corr deleted file mode 100644 index e611042482..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/corrmat/corrmat.corr +++ /dev/null @@ -1,34 +0,0 @@ -1.0 -0.19 1.0 -0.17 0.27 1.0 -0.16 0.26 0.28 1.0 -0.16 0.25 0.28 0.29 1.0 -0.15 0.24 0.27 0.29 0.3 1.0 -0.14 0.23 0.26 0.28 0.29 0.3 1.0 -0.14 0.21 0.25 0.27 0.29 0.3 0.3 1.0 -0.13 0.2 0.23 0.25 0.27 0.28 0.29 0.29 1.0 -0.11 0.17 0.2 0.23 0.25 0.26 0.27 0.28 0.27 1.0 -0.09 0.14 0.16 0.18 0.2 0.22 0.22 0.23 0.23 0.23 1.0 -0.08 0.12 0.15 0.17 0.19 0.2 0.21 0.22 0.22 0.22 0.2 1.0 -0.07 0.1 0.12 0.14 0.16 0.17 0.18 0.19 0.19 0.2 0.18 0.19 1.0 -0.06 0.08 0.1 0.11 0.13 0.14 0.15 0.16 0.16 0.17 0.16 0.16 0.15 1.0 -0.05 0.07 0.08 0.09 0.1 0.11 0.11 0.12 0.13 0.13 0.12 0.13 0.12 0.11 1.0 -0.03 0.05 0.06 0.06 0.07 0.08 0.08 0.09 0.09 0.1 0.09 0.1 0.09 0.08 0.07 1.0 -0.03 0.04 0.04 0.05 0.05 0.06 0.06 0.06 0.07 0.07 0.07 0.08 0.07 0.07 0.06 0.05 1.0 -0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.02 0.02 0.01 0.01 0.01 0.01 1.0 -0.23 0.3 0.28 0.27 0.26 0.25 0.24 0.23 0.21 0.18 0.15 0.13 0.11 0.1 0.07 0.05 0.04 0.01 1.0 -0.05 0.15 0.21 0.2 0.2 0.2 0.2 0.19 0.18 0.16 0.14 0.12 0.1 0.08 0.06 0.04 0.03 0.01 0.2 1.0 -0.04 0.07 0.12 0.15 0.16 0.17 0.17 0.17 0.16 0.15 0.13 0.13 0.11 0.08 0.06 0.05 0.03 0.01 0.13 0.12 1.0 -0.05 0.08 0.1 0.13 0.16 0.17 0.17 0.17 0.16 0.16 0.14 0.13 0.12 0.09 0.07 0.05 0.03 0.01 0.06 0.03 0.26 1.0 -0.06 0.08 0.1 0.11 0.14 0.16 0.17 0.17 0.16 0.16 0.14 0.14 0.12 0.1 0.07 0.05 0.03 0.01 0.07 0.03 0.25 0.33 1.0 -0.04 0.06 0.07 0.09 0.1 0.11 0.12 0.13 0.13 0.12 0.11 0.11 0.09 0.08 0.06 0.04 0.03 0.0 0.03 -0.01 0.28 0.35 0.34 1.0 -0.04 0.06 0.07 0.08 0.09 0.1 0.11 0.12 0.12 0.12 0.11 0.11 0.1 0.09 0.07 0.05 0.04 0.0 0.0 -0.06 0.31 0.41 0.4 0.45 1.0 -0.02 0.03 0.04 0.04 0.04 0.04 0.05 0.05 0.06 0.06 0.06 0.07 0.06 0.06 0.05 0.04 0.04 0.01 -0.07 -0.14 0.14 0.32 0.32 0.35 0.40 1.0 -0.21 0.28 0.26 0.25 0.24 0.24 0.22 0.21 0.2 0.17 0.14 0.12 0.11 0.09 0.07 0.05 0.04 0.01 0.67 0.10 0.24 0.22 0.22 0.23 0.26 0.14 1.0 -0.04 0.14 0.19 0.18 0.18 0.18 0.18 0.17 0.16 0.15 0.12 0.11 0.09 0.07 0.05 0.04 0.03 0.0 0.07 0.54 0.23 0.28 0.28 0.28 0.33 0.24 0.21 1.0 -0.04 0.07 0.11 0.14 0.15 0.15 0.15 0.15 0.15 0.14 0.12 0.12 0.1 0.08 0.06 0.04 0.03 0.01 0.05 0.03 0.64 0.27 0.26 0.27 0.32 0.23 0.18 0.22 1.0 -0.05 0.07 0.09 0.12 0.14 0.15 0.15 0.16 0.15 0.14 0.12 0.12 0.11 0.08 0.06 0.04 0.03 0.01 0.04 0.00 0.25 0.70 0.30 0.33 0.39 0.29 0.21 0.25 0.31 1.0 -0.05 0.08 0.09 0.1 0.13 0.15 0.15 0.16 0.15 0.15 0.13 0.13 0.11 0.09 0.07 0.05 0.03 0.01 0.06 0.03 0.26 0.32 0.68 0.32 0.38 0.26 0.22 0.28 0.27 0.32 1.0 -0.04 0.06 0.08 0.09 0.1 0.12 0.13 0.13 0.13 0.13 0.11 0.11 0.1 0.08 0.06 0.04 0.03 0.0 0.07 0.04 0.30 0.28 0.27 0.63 0.36 0.22 0.23 0.23 0.29 0.32 0.28 1.0 -0.04 0.06 0.08 0.09 0.1 0.11 0.12 0.13 0.14 0.14 0.12 0.12 0.11 0.1 0.08 0.06 0.04 0.01 0.14 0.20 -0.13 -0.08 -0.07 -0.17 -0.04 -0.15 -0.06 -0.04 -0.04 -0.11 -0.07 -0.19 1.0 -0.03 0.04 0.04 0.05 0.05 0.06 0.06 0.06 0.07 0.07 0.07 0.08 0.08 0.07 0.06 0.05 0.04 0.01 0.12 0.17 -0.18 -0.17 -0.15 -0.23 -0.31 0.05 -0.09 -0.11 -0.14 -0.19 -0.18 -0.24 0.48 1.0 diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py index 0642847185..83eaaa249b 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py @@ -4,7 +4,6 @@ import pandas as pd import yaml -from nnpdf_data.filter_utils.download_hepdata import HepDataTables from nnpdf_data.filter_utils.utils import covmat_to_artunc, prettify_float yaml.add_representer(float, prettify_float) @@ -163,7 +162,7 @@ def read_corrmatrix(nb_datapoints: int) -> np.ndarray: """ corrmat = pd.read_csv( - "./corrmat/corrmat.corr", + "./HEPData-ins1406555-v1-yaml/corrmat.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -379,5 +378,4 @@ def main_filter(boson: str = "Z") -> None: if __name__ == "__main__": - HepDataTables(metadata="./metadata.yaml").download() main_filter(boson="Z") diff --git a/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py b/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py index 20ed2b7d92..da7369d6c1 100644 --- a/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py +++ b/nnpdf_data/nnpdf_data/filter_utils/download_hepdata.py @@ -14,7 +14,7 @@ import os import pathlib import tarfile -from typing import Generator +from typing import Generator, Tuple from hepdata_cli.api import Client, getFilename_fromCd from hepdata_cli.resilient_requests import resilient_requests @@ -41,8 +41,8 @@ def _get_metadata_files() -> Generator: return COMMONDATA_PATH.glob("**/metadata.yaml") @staticmethod - def _get_hepdata_id(metadata_file: pathlib.Path) -> str | None: - """Get the HepData ID from the metadata for a given dataset. + def _get_hepdata_id(metadata_file: pathlib.Path) -> Tuple[int | None, str | None]: + """Get the HepData ID and version from the metadata for a given dataset. Parameters ---------- @@ -56,9 +56,11 @@ def _get_hepdata_id(metadata_file: pathlib.Path) -> str | None: metadata_yaml = safe_load(pathlib.Path(metadata_file).read_text()) if "hepdata" in metadata_yaml.keys(): url = metadata_yaml["hepdata"].get("url", None) - return url.split("/")[-1] if url is not None else None + version = metadata_yaml["hepdata"].get("version", None) + hepid = url.split("/")[-1] if url is not None else None + return version, hepid else: - return None + return None, None @staticmethod def _download_data(url: str, dataset_name: str) -> None: @@ -77,13 +79,15 @@ def _download_data(url: str, dataset_name: str) -> None: return def get_hepdata_urls(self) -> tuple[list[str], list[str]]: + # TODO: check metadata version with HepData version id_list = [] dataset = [] for meta in self.metadata_files: - hep_id = self._get_hepdata_id(metadata_file=meta) + _, hep_id = self._get_hepdata_id(metadata_file=meta) if hep_id is not None and hep_id.startswith("ins"): + data_name = str(meta).split("/")[-2] id_list.append(hep_id) - dataset.append(str(meta).split("/")[-2]) + dataset.append(data_name) hepdata_id = " ".join(id_list) urls = CLIENT._build_urls( @@ -106,8 +110,19 @@ def download_from_server(self) -> None: def compress_raw_data(self) -> None: """Compress the raw data tables to be uploaded to the NNPDF server.""" + path_compressed = f"{COMMONDATA_PATH.parent}/compressed_rawdata.tar.gz" + with tarfile.open(path_compressed, "w:gz") as tar: + for meta in self.metadata_files: + version, hep_id = self._get_hepdata_id(metadata_file=meta) + if hep_id is not None and hep_id.startswith("ins"): + data_path = meta.parent + folder_path = pathlib.Path(f"{data_path}/HEPData-{hep_id}-v{version}-yaml") + if not folder_path.is_dir(): + raise ValueError(f"{folder_path} is not a valid path!") + tar.add(folder_path) return def main(): HepDataTables().download_all() + # HepDataTables().compress_raw_data() From f1fba8bbb37099acbe4958b26e98d6ea197509cd Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Fri, 29 Nov 2024 20:27:56 +0100 Subject: [PATCH 5/7] Fix `LHCb inclusive DY` --- .../corrmat.corr | 0 .../commondata/LHCB_WPWM_7TEV_MUON/filter.py | 12 +- .../HEPData-ins1373300-v1-Table_1.yaml | 193 ----------------- .../HEPData-ins1373300-v1-Table_4.yaml | 184 ---------------- .../corrmat.corr | 0 .../commondata/LHCB_WPWM_8TEV_MUON/filter.py | 14 +- .../HEPData-ins1406555-v1-Table_1.yaml | 184 ---------------- .../HEPData-ins1406555-v1-Table_2.yaml | 200 ------------------ .../corrmata.corr | 0 .../corrmatb.corr | 0 .../commondata/LHCB_Z0_13TEV/filter.py | 19 +- .../LHCB_Z0_13TEV/rawdata/Table_2a.yaml | 104 --------- .../LHCB_Z0_13TEV/rawdata/Table_2b.yaml | 102 --------- .../LHCB_Z0_13TEV/rawdata/Table_5a.yaml | 120 ----------- .../LHCB_Z0_13TEV/rawdata/Table_5b.yaml | 116 ---------- .../corrmat.corr | 0 .../LHCB_Z0_7TEV_DIELECTRON/filter.py | 15 +- .../rawdata/Table2.yaml | 85 -------- .../corrmat.corr | 0 .../commondata/LHCB_Z0_7TEV_MUON/filter.py | 12 +- .../HEPData-ins1373300-v1-Table_1.yaml | 193 ----------------- .../HEPData-ins1373300-v1-Table_4.yaml | 184 ---------------- .../corrmat.corr | 0 .../LHCB_Z0_8TEV_DIELECTRON/filter.py | 19 +- .../rawdata/Table2.yaml | 190 ----------------- .../HEPData-ins1406555-v1-yaml/corrmat.corr | 34 +++ .../commondata/LHCB_Z0_8TEV_MUON/filter.py | 10 +- 27 files changed, 95 insertions(+), 1895 deletions(-) rename nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/{rawdata => HEPData-ins1373300-v1-yaml}/corrmat.corr (100%) delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_1.yaml delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_4.yaml rename nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/{rawdata => HEPData-ins1406555-v1-yaml}/corrmat.corr (100%) delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_1.yaml delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_2.yaml rename nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/{rawdata => HEPData-ins1477581-v1-yaml}/corrmata.corr (100%) rename nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/{rawdata => HEPData-ins1477581-v1-yaml}/corrmatb.corr (100%) delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_2a.yaml delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_2b.yaml delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_5a.yaml delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_5b.yaml rename nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/{rawdata => HEPData-ins1208102-v1-yaml}/corrmat.corr (100%) delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/rawdata/Table2.yaml rename nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/{rawdata => HEPData-ins1373300-v1-yaml}/corrmat.corr (100%) delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_1.yaml delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_4.yaml rename nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/{rawdata => HEPData-ins1347133-v1-yaml}/corrmat.corr (100%) delete mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/rawdata/Table2.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/HEPData-ins1406555-v1-yaml/corrmat.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/corrmat.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/HEPData-ins1373300-v1-yaml/corrmat.corr similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/corrmat.corr rename to nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/HEPData-ins1373300-v1-yaml/corrmat.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/filter.py index e4b0597ebd..50ab8b1118 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/filter.py @@ -36,8 +36,8 @@ def load_yaml(table_id: int, version: int = 1) -> dict: ditionary containing the table contents """ - filename = f"HEPData-ins1373300-v{version}-Table_{table_id}" - table = pathlib.Path(f"./rawdata/{filename}.yaml") + foldername = f"HEPData-ins1373300-v{version}-yaml" + table = pathlib.Path(f"./{foldername}/Table{table_id}.yaml") return yaml.safe_load(table.read_text()) @@ -147,13 +147,15 @@ def get_errors(hepdata: dict, bin_index: list, indx: int = 0) -> dict: return {"stat": stat, "sys_corr": sys_corr, "sys_beam": sys_beam, "sys_lumi": sys_lumi} -def read_corrmatrix(nb_datapoints: int) -> np.ndarray: +def read_corrmatrix(nb_datapoints: int, version: int = 1) -> np.ndarray: """Read the matrix and returns a symmetrized verions. Parameters ---------- nb_datapoints: int total number of datapoints + version: int + HepData version Returns ------- @@ -162,7 +164,7 @@ def read_corrmatrix(nb_datapoints: int) -> np.ndarray: """ corrmat = pd.read_csv( - "./rawdata/corrmat.corr", + f"./HEPData-ins1373300-v{version}-yaml/corrmat.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -366,7 +368,7 @@ def main_filter(boson: str = "Z") -> None: errors_combined = concatenate_dicts(combined_errors) # Compute the Artifical Systematics from CovMat - corrmat = read_corrmatrix(nb_datapoints=nbpoints) + corrmat = read_corrmatrix(nb_datapoints=nbpoints, version=version) covmat = multiply_syst(corrmat, errors_combined["sys_corr"]) artunc = generate_artificial_unc(ndata=nbpoints, covmat_list=covmat.tolist(), no_of_norm_mat=0) errors = format_uncertainties(errors_combined, artunc, bslice) diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_1.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_1.yaml deleted file mode 100644 index dd3e29fb37..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_1.yaml +++ /dev/null @@ -1,193 +0,0 @@ -dependent_variables: -- header: {name: '$\sigma_{Z \to \mu^{+}\mu^{-}}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: M(MU+ MU-), units: GeV, value: 60.0-120.0} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < MU+ MU- > X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {label: stat, symerror: 0.039} - - {label: sys, symerror: 0.032} - - {label: 'sys,beam', symerror: 0.012} - - {label: 'sys,lumi', symerror: 0.017} - value: 0.969 - - errors: - - {label: stat, symerror: 0.063} - - {label: sys, symerror: 0.05} - - {label: 'sys,beam', symerror: 0.036} - - {label: 'sys,lumi', symerror: 0.049} - value: 2.84 - - errors: - - {label: stat, symerror: 0.077} - - {label: sys, symerror: 0.078} - - {label: 'sys,beam', symerror: 0.055} - - {label: 'sys,lumi', symerror: 0.076} - value: 4.428 - - errors: - - {label: stat, symerror: 0.088} - - {label: sys, symerror: 0.06} - - {label: 'sys,beam', symerror: 0.073} - - {label: 'sys,lumi', symerror: 0.1} - value: 5.823 - - errors: - - {label: stat, symerror: 0.095} - - {label: sys, symerror: 0.068} - - {label: 'sys,beam', symerror: 0.086} - - {label: 'sys,lumi', symerror: 0.118} - value: 6.877 - - errors: - - {label: stat, symerror: 0.1} - - {label: sys, symerror: 0.069} - - {label: 'sys,beam', symerror: 0.096} - - {label: 'sys,lumi', symerror: 0.132} - value: 7.669 - - errors: - - {label: stat, symerror: 0.104} - - {label: sys, symerror: 0.07} - - {label: 'sys,beam', symerror: 0.104} - - {label: 'sys,lumi', symerror: 0.143} - value: 8.306 - - errors: - - {label: stat, symerror: 0.103} - - {label: sys, symerror: 0.066} - - {label: 'sys,beam', symerror: 0.103} - - {label: 'sys,lumi', symerror: 0.142} - value: 8.241 - - errors: - - {label: stat, symerror: 0.099} - - {label: sys, symerror: 0.059} - - {label: 'sys,beam', symerror: 0.097} - - {label: 'sys,lumi', symerror: 0.134} - value: 7.783 - - errors: - - {label: stat, symerror: 0.096} - - {label: sys, symerror: 0.058} - - {label: 'sys,beam', symerror: 0.089} - - {label: 'sys,lumi', symerror: 0.122} - value: 7.094 - - errors: - - {label: stat, symerror: 0.087} - - {label: sys, symerror: 0.049} - - {label: 'sys,beam', symerror: 0.074} - - {label: 'sys,lumi', symerror: 0.101} - value: 5.894 - - errors: - - {label: stat, symerror: 0.073} - - {label: sys, symerror: 0.041} - - {label: 'sys,beam', symerror: 0.052} - - {label: 'sys,lumi', symerror: 0.072} - value: 4.16 - - errors: - - {label: stat, symerror: 0.061} - - {label: sys, symerror: 0.03} - - {label: 'sys,beam', symerror: 0.036} - - {label: 'sys,lumi', symerror: 0.05} - value: 2.896 - - errors: - - {label: stat, symerror: 0.047} - - {label: sys, symerror: 0.023} - - {label: 'sys,beam', symerror: 0.022} - - {label: 'sys,lumi', symerror: 0.03} - value: 1.741 - - errors: - - {label: stat, symerror: 0.032} - - {label: sys, symerror: 0.014} - - {label: 'sys,beam', symerror: 0.01} - - {label: 'sys,lumi', symerror: 0.014} - value: 0.825 - - errors: - - {label: stat, symerror: 0.02} - - {label: sys, symerror: 0.008} - - {label: 'sys,beam', symerror: 0.004} - - {label: 'sys,lumi', symerror: 0.006} - value: 0.321 - - errors: - - {label: stat, symerror: 0.013} - - {label: sys, symerror: 0.006} - - {label: 'sys,beam', symerror: 0.001} - - {label: 'sys,lumi', symerror: 0.002} - value: 0.115 - - {value: '-'} -- header: {name: '$f_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: M(MU+ MU-), units: GeV, value: 60.0-120.0} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < MU+ MU- > X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {symerror: 0.02} - value: 1.05 - - errors: - - {symerror: 0.008} - value: 1.032 - - errors: - - {symerror: 0.006} - value: 1.027 - - errors: - - {symerror: 0.004} - value: 1.026 - - errors: - - {symerror: 0.004} - value: 1.025 - - errors: - - {symerror: 0.004} - value: 1.026 - - errors: - - {symerror: 0.003} - value: 1.026 - - errors: - - {symerror: 0.003} - value: 1.025 - - errors: - - {symerror: 0.003} - value: 1.026 - - errors: - - {symerror: 0.004} - value: 1.028 - - errors: - - {symerror: 0.004} - value: 1.026 - - errors: - - {symerror: 0.005} - value: 1.027 - - errors: - - {symerror: 0.005} - value: 1.026 - - errors: - - {symerror: 0.007} - value: 1.021 - - errors: - - {symerror: 0.01} - value: 1.025 - - errors: - - {symerror: 0.015} - value: 1.011 - - errors: - - {symerror: 0.033} - value: 1.018 - - {value: '-'} -independent_variables: -- header: {name: '$y_{Z}$'} - values: - - {high: 2.125, low: 2.0} - - {high: 2.25, low: 2.125} - - {high: 2.375, low: 2.25} - - {high: 2.5, low: 2.375} - - {high: 2.625, low: 2.5} - - {high: 2.75, low: 2.625} - - {high: 2.875, low: 2.75} - - {high: 3.0, low: 2.875} - - {high: 3.125, low: 3.0} - - {high: 3.25, low: 3.125} - - {high: 3.375, low: 3.25} - - {high: 3.5, low: 3.375} - - {high: 3.625, low: 3.5} - - {high: 3.75, low: 3.625} - - {high: 3.875, low: 3.75} - - {high: 4.0, low: 3.875} - - {high: 4.25, low: 4.0} - - {high: 4.5, low: 4.25} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_4.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_4.yaml deleted file mode 100644 index bd41dc164c..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_4.yaml +++ /dev/null @@ -1,184 +0,0 @@ -dependent_variables: -- header: {name: '$\sigma_{W^{+} \to \mu^{+}\nu}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> W+ < MU+ NU > X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {label: stat, symerror: 1.2} - - {label: sys, symerror: 3.5} - - {label: 'sys,beam', symerror: 2.0} - - {label: 'sys,lumi', symerror: 3.3} - value: 192.2 - - errors: - - {label: stat, symerror: 0.9} - - {label: sys, symerror: 3.1} - - {label: 'sys,beam', symerror: 1.9} - - {label: 'sys,lumi', symerror: 3.1} - value: 178.8 - - errors: - - {label: stat, symerror: 0.8} - - {label: sys, symerror: 2.1} - - {label: 'sys,beam', symerror: 1.6} - - {label: 'sys,lumi', symerror: 2.6} - value: 154.3 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.6} - - {label: 'sys,beam', symerror: 1.3} - - {label: 'sys,lumi', symerror: 2.1} - value: 122.8 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.3} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.6} - value: 94.3 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 0.9} - - {label: 'sys,beam', symerror: 0.7} - - {label: 'sys,lumi', symerror: 1.1} - value: 61.6 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 0.7} - - {label: 'sys,beam', symerror: 0.6} - - {label: 'sys,lumi', symerror: 1.0} - value: 60.0 - - errors: - - {label: stat, symerror: 0.4} - - {label: sys, symerror: 0.4} - - {label: 'sys,beam', symerror: 0.2} - - {label: 'sys,lumi', symerror: 0.2} - value: 14.3 -- header: {name: '$f^{+}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: ''} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {symerror: 0.004} - value: 1.016 - - errors: - - {symerror: 0.004} - value: 1.018 - - errors: - - {symerror: 0.005} - value: 1.025 - - errors: - - {symerror: 0.004} - value: 1.015 - - errors: - - {symerror: 0.005} - value: 1.021 - - errors: - - {symerror: 0.005} - value: 1.015 - - errors: - - {symerror: 0.005} - value: 1.024 - - errors: - - {symerror: 0.005} - value: 1.021 -- header: {name: '$\sigma_{W^{-} \to \mu^{-}\overline{\nu}}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> W- < MU- NUBAR > X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {label: stat, symerror: 0.9} - - {label: sys, symerror: 2.1} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.9} - value: 111.1 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.9} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.8} - value: 104.9 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.3} - - {label: 'sys,beam', symerror: 0.9} - - {label: 'sys,lumi', symerror: 1.6} - value: 96.1 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.5} - - {label: 'sys,beam', symerror: 0.8} - - {label: 'sys,lumi', symerror: 1.5} - value: 88.4 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.4} - - {label: 'sys,beam', symerror: 0.7} - - {label: 'sys,lumi', symerror: 1.4} - value: 80.6 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.5} - - {label: 'sys,beam', symerror: 0.6} - - {label: 'sys,lumi', symerror: 1.2} - value: 68.6 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 0.9} - - {label: 'sys,lumi', symerror: 1.6} - value: 95.9 - - errors: - - {label: stat, symerror: 0.8} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 0.4} - - {label: 'sys,lumi', symerror: 0.7} - value: 43.8 -- header: {name: '$f^{-}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: ''} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {symerror: 0.003} - value: 1.019 - - errors: - - {symerror: 0.003} - value: 1.015 - - errors: - - {symerror: 0.003} - value: 1.01 - - errors: - - {symerror: 0.002} - value: 1.007 - - errors: - - {symerror: 0.003} - value: 1.009 - - errors: - - {symerror: 0.005} - value: 1.017 - - errors: - - {symerror: 0.005} - value: 1.012 - - errors: - - {symerror: 0.0} - value: 1.0 -independent_variables: -- header: {name: $\eta$} - values: - - {high: 2.25, low: 2.0} - - {high: 2.5, low: 2.25} - - {high: 2.75, low: 2.5} - - {high: 3.0, low: 2.75} - - {high: 3.25, low: 3.0} - - {high: 3.5, low: 3.25} - - {high: 4.0, low: 3.5} - - {high: 4.5, low: 4.0} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/corrmat.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/HEPData-ins1406555-v1-yaml/corrmat.corr similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/corrmat.corr rename to nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/HEPData-ins1406555-v1-yaml/corrmat.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/filter.py index 771a442d4b..0d019d3cf0 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/filter.py @@ -29,6 +29,8 @@ def load_yaml(table_id: int, version: int = 1) -> dict: ---------- table_id: int table ID number + version: int + HepData version Returns ------- @@ -36,8 +38,8 @@ def load_yaml(table_id: int, version: int = 1) -> dict: ditionary containing the table contents """ - filename = f"HEPData-ins1406555-v{version}-Table_{table_id}" - table = pathlib.Path(f"./rawdata/{filename}.yaml") + foldername = f"HEPData-ins1406555-v{version}-yaml" + table = pathlib.Path(f"./{foldername}/Table{table_id}.yaml") return yaml.safe_load(table.read_text()) @@ -147,13 +149,15 @@ def get_errors(hepdata: dict, bin_index: list, indx: int = 0) -> dict: return {"stat": stat, "sys_corr": sys_corr, "sys_beam": sys_beam, "sys_lumi": sys_lumi} -def read_corrmatrix(nb_datapoints: int) -> np.ndarray: +def read_corrmatrix(nb_datapoints: int, version: int = 1) -> np.ndarray: """Read the matrix and returns a symmetrized verions. Parameters ---------- nb_datapoints: int total number of datapoints + version: int + HepData version Returns ------- @@ -162,7 +166,7 @@ def read_corrmatrix(nb_datapoints: int) -> np.ndarray: """ corrmat = pd.read_csv( - "./rawdata/corrmat.corr", + f"./HEPData-ins1406555-v{version}-yaml/corrmat.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -366,7 +370,7 @@ def main_filter(boson: str = "Z") -> None: errors_combined = concatenate_dicts(combined_errors) # Compute the Artifical Systematics from CovMat - corrmat = read_corrmatrix(nb_datapoints=nbpoints) + corrmat = read_corrmatrix(nb_datapoints=nbpoints, version=version) covmat = multiply_syst(corrmat, errors_combined["sys_corr"]) artunc = generate_artificial_unc(ndata=nbpoints, covmat_list=covmat.tolist(), no_of_norm_mat=0) errors = format_uncertainties(errors_combined, artunc, bslice) diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_1.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_1.yaml deleted file mode 100644 index 0332da55b0..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_1.yaml +++ /dev/null @@ -1,184 +0,0 @@ -dependent_variables: -- header: {name: '$\sigma_{W^{+} \to \mu^{+}\nu}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> W+ < MU+ NU > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {label: stat, symerror: 1.2} - - {label: sys, symerror: 3.2} - - {label: 'sys,beam', symerror: 2.4} - - {label: 'sys,lumi', symerror: 2.7} - value: 236.5 - - errors: - - {label: stat, symerror: 0.9} - - {label: sys, symerror: 2.2} - - {label: 'sys,beam', symerror: 2.1} - - {label: 'sys,lumi', symerror: 2.4} - value: 208.4 - - errors: - - {label: stat, symerror: 0.8} - - {label: sys, symerror: 1.8} - - {label: 'sys,beam', symerror: 1.8} - - {label: 'sys,lumi', symerror: 2.1} - value: 182.0 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.6} - - {label: 'sys,beam', symerror: 1.5} - - {label: 'sys,lumi', symerror: 1.8} - value: 153.3 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.3} - - {label: 'sys,beam', symerror: 1.2} - - {label: 'sys,lumi', symerror: 1.4} - value: 119.5 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 1.0} - - {label: 'sys,beam', symerror: 0.8} - - {label: 'sys,lumi', symerror: 1.0} - value: 84.4 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 0.9} - - {label: 'sys,lumi', symerror: 1.0} - value: 86.4 - - errors: - - {label: stat, symerror: 0.4} - - {label: sys, symerror: 0.7} - - {label: 'sys,beam', symerror: 0.2} - - {label: 'sys,lumi', symerror: 0.3} - value: 23.0 -- header: {name: '$f^{W^+}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: ''} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {symerror: 0.0047} - value: 1.0188 - - errors: - - {symerror: 0.0028} - value: 1.0163 - - errors: - - {symerror: 0.0025} - value: 1.0158 - - errors: - - {symerror: 0.0028} - value: 1.0148 - - errors: - - {symerror: 0.0032} - value: 1.0152 - - errors: - - {symerror: 0.0046} - value: 1.015 - - errors: - - {symerror: 0.0045} - value: 1.0175 - - errors: - - {symerror: 0.0087} - value: 1.0211 -- header: {name: '$\sigma_{W^{-} \to \mu^{-}\overline{\nu}}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> W- < MU- NUBAR > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {label: stat, symerror: 0.9} - - {label: sys, symerror: 1.8} - - {label: 'sys,beam', symerror: 1.2} - - {label: 'sys,lumi', symerror: 1.6} - value: 134.0 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.4} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.4} - value: 119.8 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.3} - value: 110.6 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 0.9} - - {label: 'sys,lumi', symerror: 1.2} - value: 102.4 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.1} - - {label: 'sys,beam', symerror: 0.8} - - {label: 'sys,lumi', symerror: 1.1} - value: 92.5 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 0.9} - - {label: 'sys,beam', symerror: 0.7} - - {label: 'sys,lumi', symerror: 0.9} - value: 79.9 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.5} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.4} - value: 119.3 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.6} - - {label: 'sys,beam', symerror: 0.5} - - {label: 'sys,lumi', symerror: 0.7} - value: 60.0 -- header: {name: '$f^{W^-}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: ''} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {symerror: 0.0026} - value: 1.0172 - - errors: - - {symerror: 0.0027} - value: 1.0155 - - errors: - - {symerror: 0.0028} - value: 1.0153 - - errors: - - {symerror: 0.003} - value: 1.0162 - - errors: - - {symerror: 0.0031} - value: 1.016 - - errors: - - {symerror: 0.0033} - value: 1.0176 - - errors: - - {symerror: 0.0033} - value: 1.02 - - errors: - - {symerror: 0.0053} - value: 1.0243 -independent_variables: -- header: {name: '$\eta^{\mu}$'} - values: - - {high: 2.25, low: 2.0} - - {high: 2.5, low: 2.25} - - {high: 2.75, low: 2.5} - - {high: 3.0, low: 2.75} - - {high: 3.25, low: 3.0} - - {high: 3.5, low: 3.25} - - {high: 4.0, low: 3.5} - - {high: 4.5, low: 4.0} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_2.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_2.yaml deleted file mode 100644 index 9a39670c3b..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_WPWM_8TEV_MUON/rawdata/HEPData-ins1406555-v1-Table_2.yaml +++ /dev/null @@ -1,200 +0,0 @@ -dependent_variables: -- header: {name: '$\sigma_{Z \to \mu^{+}\mu^{-}}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: M(MU+ MU-), units: GeV, value: 60.0-120.0} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < MU+ MU- > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {label: stat, symerror: 0.033} - - {label: sys, symerror: 0.055} - - {label: 'sys,beam', symerror: 0.014} - - {label: 'sys,lumi', symerror: 0.014} - value: 1.223 - - errors: - - {label: stat, symerror: 0.051} - - {label: sys, symerror: 0.06} - - {label: 'sys,beam', symerror: 0.038} - - {label: 'sys,lumi', symerror: 0.038} - value: 3.263 - - errors: - - {label: stat, symerror: 0.062} - - {label: sys, symerror: 0.064} - - {label: 'sys,beam', symerror: 0.057} - - {label: 'sys,lumi', symerror: 0.058} - value: 4.983 - - errors: - - {label: stat, symerror: 0.07} - - {label: sys, symerror: 0.072} - - {label: 'sys,beam', symerror: 0.077} - - {label: 'sys,lumi', symerror: 0.078} - value: 6.719 - - errors: - - {label: stat, symerror: 0.076} - - {label: sys, symerror: 0.074} - - {label: 'sys,beam', symerror: 0.093} - - {label: 'sys,lumi', symerror: 0.094} - value: 8.051 - - errors: - - {label: stat, symerror: 0.079} - - {label: sys, symerror: 0.074} - - {label: 'sys,beam', symerror: 0.103} - - {label: 'sys,lumi', symerror: 0.105} - value: 8.967 - - errors: - - {label: stat, symerror: 0.081} - - {label: sys, symerror: 0.076} - - {label: 'sys,beam', symerror: 0.11} - - {label: 'sys,lumi', symerror: 0.112} - value: 9.561 - - errors: - - {label: stat, symerror: 0.082} - - {label: sys, symerror: 0.071} - - {label: 'sys,beam', symerror: 0.113} - - {label: 'sys,lumi', symerror: 0.115} - value: 9.822 - - errors: - - {label: stat, symerror: 0.081} - - {label: sys, symerror: 0.074} - - {label: 'sys,beam', symerror: 0.112} - - {label: 'sys,lumi', symerror: 0.114} - value: 9.721 - - errors: - - {label: stat, symerror: 0.078} - - {label: sys, symerror: 0.071} - - {label: 'sys,beam', symerror: 0.104} - - {label: 'sys,lumi', symerror: 0.105} - value: 9.03 - - errors: - - {label: stat, symerror: 0.072} - - {label: sys, symerror: 0.074} - - {label: 'sys,beam', symerror: 0.089} - - {label: 'sys,lumi', symerror: 0.09} - value: 7.748 - - errors: - - {label: stat, symerror: 0.063} - - {label: sys, symerror: 0.051} - - {label: 'sys,beam', symerror: 0.07} - - {label: 'sys,lumi', symerror: 0.071} - value: 6.059 - - errors: - - {label: stat, symerror: 0.054} - - {label: sys, symerror: 0.041} - - {label: 'sys,beam', symerror: 0.05} - - {label: 'sys,lumi', symerror: 0.051} - value: 4.385 - - errors: - - {label: stat, symerror: 0.042} - - {label: sys, symerror: 0.027} - - {label: 'sys,beam', symerror: 0.031} - - {label: 'sys,lumi', symerror: 0.032} - value: 2.724 - - errors: - - {label: stat, symerror: 0.032} - - {label: sys, symerror: 0.02} - - {label: 'sys,beam', symerror: 0.018} - - {label: 'sys,lumi', symerror: 0.019} - value: 1.584 - - errors: - - {label: stat, symerror: 0.022} - - {label: sys, symerror: 0.012} - - {label: 'sys,beam', symerror: 0.009} - - {label: 'sys,lumi', symerror: 0.009} - value: 0.749 - - errors: - - {label: stat, symerror: 0.016} - - {label: sys, symerror: 0.008} - - {label: 'sys,beam', symerror: 0.004} - - {label: 'sys,lumi', symerror: 0.004} - value: 0.383 - - errors: - - {label: stat, symerror: 0.003} - - {label: sys, symerror: 0.001} - - {label: 'sys,beam', symerror: 0.0} - - {label: 'sys,lumi', symerror: 0.0} - value: 0.011 -- header: {name: '$f^{Z}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: M(MU+ MU-), units: GeV, value: 60.0-120.0} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < MU+ MU- > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {symerror: 0.0395} - value: 1.0466 - - errors: - - {symerror: 0.0119} - value: 1.0305 - - errors: - - {symerror: 0.0069} - value: 1.0277 - - errors: - - {symerror: 0.0061} - value: 1.0252 - - errors: - - {symerror: 0.0048} - value: 1.0264 - - errors: - - {symerror: 0.0032} - value: 1.0257 - - errors: - - {symerror: 0.0038} - value: 1.0258 - - errors: - - {symerror: 0.0027} - value: 1.0252 - - errors: - - {symerror: 0.0035} - value: 1.0282 - - errors: - - {symerror: 0.003} - value: 1.0264 - - errors: - - {symerror: 0.0066} - value: 1.0261 - - errors: - - {symerror: 0.004} - value: 1.0248 - - errors: - - {symerror: 0.006} - value: 1.0258 - - errors: - - {symerror: 0.0053} - value: 1.0228 - - errors: - - {symerror: 0.0079} - value: 1.018 - - errors: - - {symerror: 0.01} - value: 1.0207 - - errors: - - {symerror: 0.014} - value: 1.0183 - - errors: - - {symerror: 0.0761} - value: 1.0177 -independent_variables: -- header: {name: '$y_{Z}$'} - values: - - {high: 2.125, low: 2.0} - - {high: 2.25, low: 2.125} - - {high: 2.375, low: 2.25} - - {high: 2.5, low: 2.375} - - {high: 2.625, low: 2.5} - - {high: 2.75, low: 2.625} - - {high: 2.875, low: 2.75} - - {high: 3.0, low: 2.875} - - {high: 3.125, low: 3.0} - - {high: 3.25, low: 3.125} - - {high: 3.375, low: 3.25} - - {high: 3.5, low: 3.375} - - {high: 3.625, low: 3.5} - - {high: 3.75, low: 3.625} - - {high: 3.875, low: 3.75} - - {high: 4.0, low: 3.875} - - {high: 4.25, low: 4.0} - - {high: 4.5, low: 4.25} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/corrmata.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/HEPData-ins1477581-v1-yaml/corrmata.corr similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/corrmata.corr rename to nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/HEPData-ins1477581-v1-yaml/corrmata.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/corrmatb.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/HEPData-ins1477581-v1-yaml/corrmatb.corr similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/corrmatb.corr rename to nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/HEPData-ins1477581-v1-yaml/corrmatb.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/filter.py index c999f9660c..7a0aa3e09e 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/filter.py @@ -16,7 +16,7 @@ MAP_METADATA = {"dielectron": 0, "dimuon": 1} -def load_yaml(table_id: int, state: str) -> dict: +def load_yaml(table_id: int, state: str, version: int = 1) -> dict: """Load the HEP data table in yaml format. Parameters @@ -25,6 +25,8 @@ def load_yaml(table_id: int, state: str) -> dict: table ID number state: str type of final state (Di-electron/Di-muon) + version: int + HepData version Returns ------- @@ -32,7 +34,8 @@ def load_yaml(table_id: int, state: str) -> dict: ditionary containing the table contents """ - filename = f"./rawdata/Table_{table_id}{MAP_STATE[state]}.yaml" + foldername = f"HEPData-ins1477581-v{version}-yaml" + filename = f"./{foldername}/Table_{table_id}{MAP_STATE[state]}.yaml" table = pathlib.Path(filename) return yaml.safe_load(table.read_text()) @@ -140,7 +143,7 @@ def get_errors(hepdata: dict, bin_index: list) -> dict: return {"stat": stat, "sys_corr": sys_corr, "sys_lumi": sys_lumi} -def read_corrmatrix(nb_datapoints: int, state: str) -> np.ndarray: +def read_corrmatrix(nb_datapoints: int, state: str, version: int = 1) -> np.ndarray: """Read the matrix and returns a symmetrized verions. Parameters @@ -149,6 +152,8 @@ def read_corrmatrix(nb_datapoints: int, state: str) -> np.ndarray: total number of datapoints state: str type of final state (Di-electron/Di-muon) + version: int + HepData version Returns ------- @@ -157,7 +162,7 @@ def read_corrmatrix(nb_datapoints: int, state: str) -> np.ndarray: """ corrmat = pd.read_csv( - f"./rawdata/corrmat{MAP_STATE[state]}.corr", + f"./HEPData-ins1477581-v{version}-yaml/corrmat{MAP_STATE[state]}.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -321,13 +326,13 @@ def main_filter(): """ for state in FINAL_STATE: - _, nbpoints, tables = read_metadata(final_state=state) + version, nbpoints, tables = read_metadata(final_state=state) bin_index = [i for i in range(nbpoints)] # Non-empty Bins comb_kins, comb_data = [], [] combined_errors = [] for tabid in tables: - yaml_content = load_yaml(table_id=tabid, state=state) + yaml_content = load_yaml(table_id=tabid, state=state, version=version) # The rapidity bin values for Di-electron are in Table2b kinematic_content = load_yaml(table_id=2, state=state) @@ -344,7 +349,7 @@ def main_filter(): errors_combined = concatenate_dicts(combined_errors) # Compute the Artifical Systematics from CovMat - corrmat = read_corrmatrix(nb_datapoints=nbpoints, state=state) + corrmat = read_corrmatrix(nb_datapoints=nbpoints, state=state, version=version) covmat = multiply_syst(corrmat, errors_combined["sys_corr"]) artunc = generate_artificial_unc( ndata=nbpoints, covmat_list=covmat.tolist(), no_of_norm_mat=0 diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_2a.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_2a.yaml deleted file mode 100644 index 1ef6ca8b11..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_2a.yaml +++ /dev/null @@ -1,104 +0,0 @@ -dependent_variables: -- header: {name: '$f_\text{FSR}^{\mu\mu}$'} - qualifiers: - - {name: ETARAP(LEPTON), value: 2.0-4.5} - - {name: M(LEPTON+ LEPTON-), units: GeV, value: 60.0-120.0} - - {name: PT(LEPTON), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < LEPTON+ LEPTON- > X} - - {name: SQRT(S), units: GeV, value: '13000.0'} - values: - - errors: - - {symerror: 0.005} - value: 1.016 - - errors: - - {symerror: 0.004} - value: 1.017 - - errors: - - {symerror: 0.002} - value: 1.021 - - errors: - - {symerror: 0.002} - value: 1.018 - - errors: - - {symerror: 0.003} - value: 1.023 - - errors: - - {symerror: 0.003} - value: 1.022 - - errors: - - {symerror: 0.002} - value: 1.022 - - errors: - - {symerror: 0.003} - value: 1.023 - - errors: - - {symerror: 0.002} - value: 1.026 - - errors: - - {symerror: 0.002} - value: 1.026 - - errors: - - {symerror: 0.004} - value: 1.025 - - errors: - - {symerror: 0.005} - value: 1.026 - - errors: - - {symerror: 0.002} - value: 1.027 - - errors: - - {symerror: 0.002} - value: 1.024 - - errors: - - {symerror: 0.003} - value: 1.021 - - errors: - - {symerror: 0.019} - value: 1.019 - - errors: - - {symerror: 0.014} - value: 1.034 - - errors: - - {symerror: 0.119} - value: 1.046 -independent_variables: -- header: {name: Bin index} - values: - - {value: 1} - - {value: 2} - - {value: 3} - - {value: 4} - - {value: 5} - - {value: 6} - - {value: 7} - - {value: 8} - - {value: 9} - - {value: 10} - - {value: 11} - - {value: 12} - - {value: 13} - - {value: 14} - - {value: 15} - - {value: 16} - - {value: 17} - - {value: 18} -- header: {name: Bin range} - values: - - {value: 2.000-2.125} - - {value: 2.125-2.250} - - {value: 2.250-2.375} - - {value: 2.375-2.500} - - {value: 2.500-2.625} - - {value: 2.625-2.750} - - {value: 2.750-2.875} - - {value: 2.875-3.000} - - {value: 3.000-3.125} - - {value: 3.125-3.250} - - {value: 3.250-3.375} - - {value: 3.375-3.500} - - {value: 3.500-3.625} - - {value: 3.625-3.750} - - {value: 3.750-3.875} - - {value: 3.875-4.000} - - {value: 4.000-4.250} - - {value: 4.250-4.500} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_2b.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_2b.yaml deleted file mode 100644 index eb004deb88..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_2b.yaml +++ /dev/null @@ -1,102 +0,0 @@ -dependent_variables: -- header: {name: '$f_\text{FSR}^{ e e}$'} - qualifiers: - - {name: ETARAP(LEPTON), value: 2.0-4.5} - - {name: M(LEPTON+ LEPTON-), units: GeV, value: 60.0-120.0} - - {name: PT(LEPTON), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < LEPTON+ LEPTON- > X} - - {name: SQRT(S), units: GeV, value: '13000.0'} - values: - - errors: - - {symerror: 0.003} - value: 1.034 - - errors: - - {symerror: 0.005} - value: 1.037 - - errors: - - {symerror: 0.002} - value: 1.04 - - errors: - - {symerror: 0.002} - value: 1.041 - - errors: - - {symerror: 0.002} - value: 1.043 - - errors: - - {symerror: 0.004} - value: 1.044 - - errors: - - {symerror: 0.004} - value: 1.047 - - errors: - - {symerror: 0.002} - value: 1.048 - - errors: - - {symerror: 0.002} - value: 1.051 - - errors: - - {symerror: 0.002} - value: 1.051 - - errors: - - {symerror: 0.001} - value: 1.055 - - errors: - - {symerror: 0.003} - value: 1.053 - - errors: - - {symerror: 0.005} - value: 1.049 - - errors: - - {symerror: 0.007} - value: 1.051 - - errors: - - {symerror: 0.004} - value: 1.045 - - errors: - - {symerror: 0.011} - value: 1.038 - - errors: - - {symerror: 0.013} - value: 1.061 - - {value: '-'} -independent_variables: -- header: {name: Bin index} - values: - - {value: 1} - - {value: 2} - - {value: 3} - - {value: 4} - - {value: 5} - - {value: 6} - - {value: 7} - - {value: 8} - - {value: 9} - - {value: 10} - - {value: 11} - - {value: 12} - - {value: 13} - - {value: 14} - - {value: 15} - - {value: 16} - - {value: 17} - - {value: 18} -- header: {name: Bin range} - values: - - {value: 2.000-2.125} - - {value: 2.125-2.250} - - {value: 2.250-2.375} - - {value: 2.375-2.500} - - {value: 2.500-2.625} - - {value: 2.625-2.750} - - {value: 2.750-2.875} - - {value: 2.875-3.000} - - {value: 3.000-3.125} - - {value: 3.125-3.250} - - {value: 3.250-3.375} - - {value: 3.375-3.500} - - {value: 3.500-3.625} - - {value: 3.625-3.750} - - {value: 3.750-3.875} - - {value: 3.875-4.000} - - {value: 4.000-4.250} - - {value: 4.250-4.500} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_5a.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_5a.yaml deleted file mode 100644 index e2dee48e00..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_5a.yaml +++ /dev/null @@ -1,120 +0,0 @@ -dependent_variables: -- header: {name: '{$\rm{d}\sigma_{ Z}^{\mu\mu}/\rm{d}y_ Z$', units: pb} - qualifiers: - - {name: ETARAP(LEPTON), value: 2.0-4.5} - - {name: M(LEPTON+ LEPTON-), units: GeV, value: 60.0-120.0} - - {name: PT(LEPTON), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < LEPTON+ LEPTON- > X} - - {name: SQRT(S), units: GeV, value: '13000.0'} - values: - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 0.5} - - {label: 'sys,lumi', symerror: 0.6} - value: 14.2 - - errors: - - {label: stat, symerror: 1.2} - - {label: sys, symerror: 1.2} - - {label: 'sys,lumi', symerror: 1.6} - value: 41.9 - - errors: - - {label: stat, symerror: 1.5} - - {label: sys, symerror: 1.8} - - {label: 'sys,lumi', symerror: 2.5} - value: 65.2 - - errors: - - {label: stat, symerror: 1.8} - - {label: sys, symerror: 2.3} - - {label: 'sys,lumi', symerror: 3.6} - value: 91.3 - - errors: - - {label: stat, symerror: 2.0} - - {label: sys, symerror: 2.7} - - {label: 'sys,lumi', symerror: 4.2} - value: 108.0 - - errors: - - {label: stat, symerror: 2.1} - - {label: sys, symerror: 3.0} - - {label: 'sys,lumi', symerror: 4.7} - value: 121.4 - - errors: - - {label: stat, symerror: 2.2} - - {label: sys, symerror: 3.3} - - {label: 'sys,lumi', symerror: 5.3} - value: 136.0 - - errors: - - {label: stat, symerror: 2.2} - - {label: sys, symerror: 3.4} - - {label: 'sys,lumi', symerror: 5.5} - value: 140.8 - - errors: - - {label: stat, symerror: 2.3} - - {label: sys, symerror: 3.5} - - {label: 'sys,lumi', symerror: 5.7} - value: 145.5 - - errors: - - {label: stat, symerror: 2.3} - - {label: sys, symerror: 3.4} - - {label: 'sys,lumi', symerror: 5.6} - value: 144.0 - - errors: - - {label: stat, symerror: 2.2} - - {label: sys, symerror: 3.3} - - {label: 'sys,lumi', symerror: 5.3} - value: 137.1 - - errors: - - {label: stat, symerror: 2.1} - - {label: sys, symerror: 3.0} - - {label: 'sys,lumi', symerror: 4.8} - value: 121.8 - - errors: - - {label: stat, symerror: 1.9} - - {label: sys, symerror: 2.4} - - {label: 'sys,lumi', symerror: 3.9} - value: 100.4 - - errors: - - {label: stat, symerror: 1.7} - - {label: sys, symerror: 1.8} - - {label: 'sys,lumi', symerror: 2.9} - value: 75.2 - - errors: - - {label: stat, symerror: 1.5} - - {label: sys, symerror: 1.5} - - {label: 'sys,lumi', symerror: 2.3} - value: 57.9 - - errors: - - {label: stat, symerror: 1.2} - - {label: sys, symerror: 1.3} - - {label: 'sys,lumi', symerror: 1.6} - value: 41.1 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 0.6} - - {label: 'sys,lumi', symerror: 0.7} - value: 18.4 - - errors: - - {label: stat, symerror: 0.2} - - {label: sys, symerror: 0.3} - - {label: 'sys,lumi', symerror: 0.1} - value: 2.6 -independent_variables: -- header: {name: Bin index} - values: - - {value: 1} - - {value: 2} - - {value: 3} - - {value: 4} - - {value: 5} - - {value: 6} - - {value: 7} - - {value: 8} - - {value: 9} - - {value: 10} - - {value: 11} - - {value: 12} - - {value: 13} - - {value: 14} - - {value: 15} - - {value: 16} - - {value: 17} - - {value: 18} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_5b.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_5b.yaml deleted file mode 100644 index 7d96499631..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_13TEV/rawdata/Table_5b.yaml +++ /dev/null @@ -1,116 +0,0 @@ -dependent_variables: -- header: {name: '{$\rm{d}\sigma_{ Z}^{\rm{ee}}/\rm{d}y_ Z$', units: pb} - qualifiers: - - {name: ETARAP(LEPTON), value: 2.0-4.5} - - {name: M(LEPTON+ LEPTON-), units: GeV, value: 60.0-120.0} - - {name: PT(LEPTON), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < LEPTON+ LEPTON- > X} - - {name: SQRT(S), units: GeV, value: '13000.0'} - values: - - errors: - - {label: stat, symerror: 1.3} - - {label: sys, symerror: 0.7} - - {label: 'sys,lumi', symerror: 0.5} - value: 11.8 - - errors: - - {label: stat, symerror: 2.2} - - {label: sys, symerror: 1.6} - - {label: 'sys,lumi', symerror: 1.6} - value: 42.1 - - errors: - - {label: stat, symerror: 2.5} - - {label: sys, symerror: 2.1} - - {label: 'sys,lumi', symerror: 2.6} - value: 66.1 - - errors: - - {label: stat, symerror: 2.9} - - {label: sys, symerror: 2.6} - - {label: 'sys,lumi', symerror: 3.4} - value: 87.9 - - errors: - - {label: stat, symerror: 3.0} - - {label: sys, symerror: 2.8} - - {label: 'sys,lumi', symerror: 3.7} - value: 95.8 - - errors: - - {label: stat, symerror: 3.3} - - {label: sys, symerror: 3.4} - - {label: 'sys,lumi', symerror: 4.6} - value: 118.5 - - errors: - - {label: stat, symerror: 3.6} - - {label: sys, symerror: 3.7} - - {label: 'sys,lumi', symerror: 5.2} - value: 133.3 - - errors: - - {label: stat, symerror: 3.7} - - {label: sys, symerror: 3.9} - - {label: 'sys,lumi', symerror: 5.5} - value: 141.3 - - errors: - - {label: stat, symerror: 4.0} - - {label: sys, symerror: 4.2} - - {label: 'sys,lumi', symerror: 5.9} - value: 151.2 - - errors: - - {label: stat, symerror: 3.9} - - {label: sys, symerror: 3.7} - - {label: 'sys,lumi', symerror: 5.2} - value: 133.6 - - errors: - - {label: stat, symerror: 4.1} - - {label: sys, symerror: 3.7} - - {label: 'sys,lumi', symerror: 5.1} - value: 129.6 - - errors: - - {label: stat, symerror: 4.0} - - {label: sys, symerror: 3.4} - - {label: 'sys,lumi', symerror: 4.5} - value: 116.5 - - errors: - - {label: stat, symerror: 3.8} - - {label: sys, symerror: 2.9} - - {label: 'sys,lumi', symerror: 3.6} - value: 93.5 - - errors: - - {label: stat, symerror: 3.7} - - {label: sys, symerror: 2.2} - - {label: 'sys,lumi', symerror: 2.5} - value: 63.8 - - errors: - - {label: stat, symerror: 3.7} - - {label: sys, symerror: 2.4} - - {label: 'sys,lumi', symerror: 2.3} - value: 58.6 - - errors: - - {label: stat, symerror: 4.0} - - {label: sys, symerror: 1.9} - - {label: 'sys,lumi', symerror: 1.4} - value: 34.7 - - errors: - - {label: stat, symerror: 3.2} - - {label: sys, symerror: 1.6} - - {label: 'sys,lumi', symerror: 0.7} - value: 18.8 - - {value: '-'} -independent_variables: -- header: {name: Bin index} - values: - - {value: 1} - - {value: 2} - - {value: 3} - - {value: 4} - - {value: 5} - - {value: 6} - - {value: 7} - - {value: 8} - - {value: 9} - - {value: 10} - - {value: 11} - - {value: 12} - - {value: 13} - - {value: 14} - - {value: 15} - - {value: 16} - - {value: 17} - - {value: 18} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/rawdata/corrmat.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/HEPData-ins1208102-v1-yaml/corrmat.corr similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/rawdata/corrmat.corr rename to nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/HEPData-ins1208102-v1-yaml/corrmat.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/filter.py index ab9a0f7097..ecebd83b28 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/filter.py @@ -13,7 +13,7 @@ NORM_FACTOR = 1_000.0 # from pb -> fb -def load_yaml(table_id: int) -> dict: +def load_yaml(table_id: int, version: int = 1) -> dict: """Load the HEP data table in yaml format. Parameters @@ -27,7 +27,8 @@ def load_yaml(table_id: int) -> dict: ditionary containing the table contents """ - table = pathlib.Path(f"./rawdata/Table{table_id}.yaml") + foldername = f"HEPData-ins1208102-v{version}-yaml" + table = pathlib.Path(f"./{foldername}/Table{table_id}.yaml") return yaml.safe_load(table.read_text()) @@ -133,7 +134,7 @@ def get_errors(hepdata: dict, central: list) -> dict: return {"stat": stat, "sys_uncorr": sys_uncorr, "sys_corr": sys_corr, "sys_lumi": sys_lumi} -def read_corrmatrix(nb_datapoints: int) -> np.ndarray: +def read_corrmatrix(nb_datapoints: int, version: int = 1) -> np.ndarray: """Read the matrix and returns a symmetrized verions. Parameters @@ -148,7 +149,7 @@ def read_corrmatrix(nb_datapoints: int) -> np.ndarray: """ corrmat = pd.read_csv( - "./rawdata/corrmat.corr", + f"./HEPData-ins1208102-v{version}-yaml/corrmat.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -318,12 +319,12 @@ def main_filter(): 4. Luminosity Systematic uncertainties: MULT, LHCBLUMI10 """ - _, nbpoints, tables = read_metadata() + version, nbpoints, tables = read_metadata() comb_kins, comb_data = [], [] combined_errors = [] for tabid in tables: - yaml_content = load_yaml(table_id=tabid) + yaml_content = load_yaml(table_id=tabid, version=version) # Extract the kinematic, data, and uncertainties kinematics = get_kinematics(hepdata=yaml_content) @@ -337,7 +338,7 @@ def main_filter(): errors_combined = concatenate_dicts(combined_errors) # Compute the Artifical Systematics from CovMat - corrmat = read_corrmatrix(nb_datapoints=nbpoints) + corrmat = read_corrmatrix(nb_datapoints=nbpoints, version=version) covmat = multiply_syst(corrmat, errors_combined["sys_corr"]) artunc = generate_artificial_unc(ndata=nbpoints, covmat_list=covmat.tolist(), no_of_norm_mat=0) errors = format_uncertainties(errors_combined, artunc) diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/rawdata/Table2.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/rawdata/Table2.yaml deleted file mode 100644 index efe0e71574..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_DIELECTRON/rawdata/Table2.yaml +++ /dev/null @@ -1,85 +0,0 @@ -dependent_variables: -- header: {name: D(SIG)/DYRAP, units: PB} - qualifiers: - - {name: LEPTON ETARAP, value: 2.0-4.5} - - {name: LEPTON PT, units: GEV, value: '> 20.0'} - - {name: LEPTONS MASS, units: GeV, value: 60.0-120.0} - - {name: QED FSR level, value: Born} - - {name: RE, value: P P --> Z0 X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {label: stat, symerror: 0.7} - - {label: 'sys,uncorr', symerror: 0.4} - - {label: 'sys,corr', symerror: 0.3} - - {label: 'sys,FSR', symerror: 0.1} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 13.6 - - errors: - - {label: stat, symerror: 1.0} - - {label: 'sys,uncorr', symerror: 0.6} - - {label: 'sys,corr', symerror: 0.8} - - {label: 'sys,FSR', symerror: 0.2} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 39.4 - - errors: - - {label: stat, symerror: 1.2} - - {label: 'sys,uncorr', symerror: 0.7} - - {label: 'sys,corr', symerror: 1.3} - - {label: 'sys,FSR', symerror: 0.3} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 56.7 - - errors: - - {label: stat, symerror: 1.3} - - {label: 'sys,uncorr', symerror: 0.8} - - {label: 'sys,corr', symerror: 1.5} - - {label: 'sys,FSR', symerror: 0.3} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 63.2 - - errors: - - {label: stat, symerror: 1.4} - - {label: 'sys,uncorr', symerror: 0.8} - - {label: 'sys,corr', symerror: 1.6} - - {label: 'sys,FSR', symerror: 0.3} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 59.9 - - errors: - - {label: stat, symerror: 1.3} - - {label: 'sys,uncorr', symerror: 0.8} - - {label: 'sys,corr', symerror: 1.1} - - {label: 'sys,FSR', symerror: 0.2} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 43.8 - - errors: - - {label: stat, symerror: 1.0} - - {label: 'sys,uncorr', symerror: 0.7} - - {label: 'sys,corr', symerror: 0.6} - - {label: 'sys,FSR', symerror: 0.1} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 20.5 - - errors: - - {label: stat, symerror: 0.8} - - {label: 'sys,uncorr', symerror: 0.5} - - {label: 'sys,corr', symerror: 0.3} - - {label: 'sys,FSR', symerror: 0.1} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 5.9 - - errors: - - {label: stat, symerror: 0.44} - - {label: 'sys,uncorr', symerror: 0.3} - - {label: 'sys,corr', symerror: 0.04} - - {label: 'sys,FSR', symerror: 0.02} - - {label: 'sys,Luminosity uncertainty, not included in the plots', symerror: 3.5%} - value: 0.66 -independent_variables: -- header: {name: YRAP} - values: - - {high: 2.25, low: 2.0} - - {high: 2.5, low: 2.25} - - {high: 2.75, low: 2.5} - - {high: 3.0, low: 2.75} - - {high: 3.25, low: 3.0} - - {high: 3.5, low: 3.25} - - {high: 3.75, low: 3.5} - - {high: 4.0, low: 3.75} - - {high: 4.25, low: 4.0} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/corrmat.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/HEPData-ins1373300-v1-yaml/corrmat.corr similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/corrmat.corr rename to nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/HEPData-ins1373300-v1-yaml/corrmat.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/filter.py index c449c22b19..2a6b0a362a 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/filter.py @@ -36,8 +36,8 @@ def load_yaml(table_id: int, version: int = 1) -> dict: ditionary containing the table contents """ - filename = f"HEPData-ins1373300-v{version}-Table_{table_id}" - table = pathlib.Path(f"./rawdata/{filename}.yaml") + foldername = f"HEPData-ins1373300-v{version}-yaml" + table = pathlib.Path(f"./{foldername}/Table{table_id}.yaml") return yaml.safe_load(table.read_text()) @@ -147,7 +147,7 @@ def get_errors(hepdata: dict, bin_index: list, indx: int = 0) -> dict: return {"stat": stat, "sys_corr": sys_corr, "sys_beam": sys_beam, "sys_lumi": sys_lumi} -def read_corrmatrix(nb_datapoints: int) -> np.ndarray: +def read_corrmatrix(nb_datapoints: int, version: int = 1) -> np.ndarray: """Read the matrix and returns a symmetrized verions. Parameters @@ -162,7 +162,7 @@ def read_corrmatrix(nb_datapoints: int) -> np.ndarray: """ corrmat = pd.read_csv( - "./rawdata/corrmat.corr", + f"./HEPData-ins1373300-v{version}-yaml/corrmat.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -225,7 +225,7 @@ def concatenate_dicts(multidict: list[dict]) -> dict: dict: dictionary whose keys are combined """ - new_dict = {} + new_dict: dict = {} for key in multidict[0].keys(): new_dict[key] = [] for element in multidict: @@ -366,7 +366,7 @@ def main_filter(boson: str = "Z") -> None: errors_combined = concatenate_dicts(combined_errors) # Compute the Artifical Systematics from CovMat - corrmat = read_corrmatrix(nb_datapoints=nbpoints) + corrmat = read_corrmatrix(nb_datapoints=nbpoints, version=version) covmat = multiply_syst(corrmat, errors_combined["sys_corr"]) artunc = generate_artificial_unc(ndata=nbpoints, covmat_list=covmat.tolist(), no_of_norm_mat=0) errors = format_uncertainties(errors_combined, artunc, bslice) diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_1.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_1.yaml deleted file mode 100644 index dd3e29fb37..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_1.yaml +++ /dev/null @@ -1,193 +0,0 @@ -dependent_variables: -- header: {name: '$\sigma_{Z \to \mu^{+}\mu^{-}}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: M(MU+ MU-), units: GeV, value: 60.0-120.0} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < MU+ MU- > X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {label: stat, symerror: 0.039} - - {label: sys, symerror: 0.032} - - {label: 'sys,beam', symerror: 0.012} - - {label: 'sys,lumi', symerror: 0.017} - value: 0.969 - - errors: - - {label: stat, symerror: 0.063} - - {label: sys, symerror: 0.05} - - {label: 'sys,beam', symerror: 0.036} - - {label: 'sys,lumi', symerror: 0.049} - value: 2.84 - - errors: - - {label: stat, symerror: 0.077} - - {label: sys, symerror: 0.078} - - {label: 'sys,beam', symerror: 0.055} - - {label: 'sys,lumi', symerror: 0.076} - value: 4.428 - - errors: - - {label: stat, symerror: 0.088} - - {label: sys, symerror: 0.06} - - {label: 'sys,beam', symerror: 0.073} - - {label: 'sys,lumi', symerror: 0.1} - value: 5.823 - - errors: - - {label: stat, symerror: 0.095} - - {label: sys, symerror: 0.068} - - {label: 'sys,beam', symerror: 0.086} - - {label: 'sys,lumi', symerror: 0.118} - value: 6.877 - - errors: - - {label: stat, symerror: 0.1} - - {label: sys, symerror: 0.069} - - {label: 'sys,beam', symerror: 0.096} - - {label: 'sys,lumi', symerror: 0.132} - value: 7.669 - - errors: - - {label: stat, symerror: 0.104} - - {label: sys, symerror: 0.07} - - {label: 'sys,beam', symerror: 0.104} - - {label: 'sys,lumi', symerror: 0.143} - value: 8.306 - - errors: - - {label: stat, symerror: 0.103} - - {label: sys, symerror: 0.066} - - {label: 'sys,beam', symerror: 0.103} - - {label: 'sys,lumi', symerror: 0.142} - value: 8.241 - - errors: - - {label: stat, symerror: 0.099} - - {label: sys, symerror: 0.059} - - {label: 'sys,beam', symerror: 0.097} - - {label: 'sys,lumi', symerror: 0.134} - value: 7.783 - - errors: - - {label: stat, symerror: 0.096} - - {label: sys, symerror: 0.058} - - {label: 'sys,beam', symerror: 0.089} - - {label: 'sys,lumi', symerror: 0.122} - value: 7.094 - - errors: - - {label: stat, symerror: 0.087} - - {label: sys, symerror: 0.049} - - {label: 'sys,beam', symerror: 0.074} - - {label: 'sys,lumi', symerror: 0.101} - value: 5.894 - - errors: - - {label: stat, symerror: 0.073} - - {label: sys, symerror: 0.041} - - {label: 'sys,beam', symerror: 0.052} - - {label: 'sys,lumi', symerror: 0.072} - value: 4.16 - - errors: - - {label: stat, symerror: 0.061} - - {label: sys, symerror: 0.03} - - {label: 'sys,beam', symerror: 0.036} - - {label: 'sys,lumi', symerror: 0.05} - value: 2.896 - - errors: - - {label: stat, symerror: 0.047} - - {label: sys, symerror: 0.023} - - {label: 'sys,beam', symerror: 0.022} - - {label: 'sys,lumi', symerror: 0.03} - value: 1.741 - - errors: - - {label: stat, symerror: 0.032} - - {label: sys, symerror: 0.014} - - {label: 'sys,beam', symerror: 0.01} - - {label: 'sys,lumi', symerror: 0.014} - value: 0.825 - - errors: - - {label: stat, symerror: 0.02} - - {label: sys, symerror: 0.008} - - {label: 'sys,beam', symerror: 0.004} - - {label: 'sys,lumi', symerror: 0.006} - value: 0.321 - - errors: - - {label: stat, symerror: 0.013} - - {label: sys, symerror: 0.006} - - {label: 'sys,beam', symerror: 0.001} - - {label: 'sys,lumi', symerror: 0.002} - value: 0.115 - - {value: '-'} -- header: {name: '$f_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: M(MU+ MU-), units: GeV, value: 60.0-120.0} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < MU+ MU- > X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {symerror: 0.02} - value: 1.05 - - errors: - - {symerror: 0.008} - value: 1.032 - - errors: - - {symerror: 0.006} - value: 1.027 - - errors: - - {symerror: 0.004} - value: 1.026 - - errors: - - {symerror: 0.004} - value: 1.025 - - errors: - - {symerror: 0.004} - value: 1.026 - - errors: - - {symerror: 0.003} - value: 1.026 - - errors: - - {symerror: 0.003} - value: 1.025 - - errors: - - {symerror: 0.003} - value: 1.026 - - errors: - - {symerror: 0.004} - value: 1.028 - - errors: - - {symerror: 0.004} - value: 1.026 - - errors: - - {symerror: 0.005} - value: 1.027 - - errors: - - {symerror: 0.005} - value: 1.026 - - errors: - - {symerror: 0.007} - value: 1.021 - - errors: - - {symerror: 0.01} - value: 1.025 - - errors: - - {symerror: 0.015} - value: 1.011 - - errors: - - {symerror: 0.033} - value: 1.018 - - {value: '-'} -independent_variables: -- header: {name: '$y_{Z}$'} - values: - - {high: 2.125, low: 2.0} - - {high: 2.25, low: 2.125} - - {high: 2.375, low: 2.25} - - {high: 2.5, low: 2.375} - - {high: 2.625, low: 2.5} - - {high: 2.75, low: 2.625} - - {high: 2.875, low: 2.75} - - {high: 3.0, low: 2.875} - - {high: 3.125, low: 3.0} - - {high: 3.25, low: 3.125} - - {high: 3.375, low: 3.25} - - {high: 3.5, low: 3.375} - - {high: 3.625, low: 3.5} - - {high: 3.75, low: 3.625} - - {high: 3.875, low: 3.75} - - {high: 4.0, low: 3.875} - - {high: 4.25, low: 4.0} - - {high: 4.5, low: 4.25} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_4.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_4.yaml deleted file mode 100644 index bd41dc164c..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_7TEV_MUON/rawdata/HEPData-ins1373300-v1-Table_4.yaml +++ /dev/null @@ -1,184 +0,0 @@ -dependent_variables: -- header: {name: '$\sigma_{W^{+} \to \mu^{+}\nu}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> W+ < MU+ NU > X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {label: stat, symerror: 1.2} - - {label: sys, symerror: 3.5} - - {label: 'sys,beam', symerror: 2.0} - - {label: 'sys,lumi', symerror: 3.3} - value: 192.2 - - errors: - - {label: stat, symerror: 0.9} - - {label: sys, symerror: 3.1} - - {label: 'sys,beam', symerror: 1.9} - - {label: 'sys,lumi', symerror: 3.1} - value: 178.8 - - errors: - - {label: stat, symerror: 0.8} - - {label: sys, symerror: 2.1} - - {label: 'sys,beam', symerror: 1.6} - - {label: 'sys,lumi', symerror: 2.6} - value: 154.3 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.6} - - {label: 'sys,beam', symerror: 1.3} - - {label: 'sys,lumi', symerror: 2.1} - value: 122.8 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.3} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.6} - value: 94.3 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 0.9} - - {label: 'sys,beam', symerror: 0.7} - - {label: 'sys,lumi', symerror: 1.1} - value: 61.6 - - errors: - - {label: stat, symerror: 0.5} - - {label: sys, symerror: 0.7} - - {label: 'sys,beam', symerror: 0.6} - - {label: 'sys,lumi', symerror: 1.0} - value: 60.0 - - errors: - - {label: stat, symerror: 0.4} - - {label: sys, symerror: 0.4} - - {label: 'sys,beam', symerror: 0.2} - - {label: 'sys,lumi', symerror: 0.2} - value: 14.3 -- header: {name: '$f^{+}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: ''} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {symerror: 0.004} - value: 1.016 - - errors: - - {symerror: 0.004} - value: 1.018 - - errors: - - {symerror: 0.005} - value: 1.025 - - errors: - - {symerror: 0.004} - value: 1.015 - - errors: - - {symerror: 0.005} - value: 1.021 - - errors: - - {symerror: 0.005} - value: 1.015 - - errors: - - {symerror: 0.005} - value: 1.024 - - errors: - - {symerror: 0.005} - value: 1.021 -- header: {name: '$\sigma_{W^{-} \to \mu^{-}\overline{\nu}}$', units: PB} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: P P --> W- < MU- NUBAR > X} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {label: stat, symerror: 0.9} - - {label: sys, symerror: 2.1} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.9} - value: 111.1 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.9} - - {label: 'sys,beam', symerror: 1.0} - - {label: 'sys,lumi', symerror: 1.8} - value: 104.9 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.3} - - {label: 'sys,beam', symerror: 0.9} - - {label: 'sys,lumi', symerror: 1.6} - value: 96.1 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.5} - - {label: 'sys,beam', symerror: 0.8} - - {label: 'sys,lumi', symerror: 1.5} - value: 88.4 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.4} - - {label: 'sys,beam', symerror: 0.7} - - {label: 'sys,lumi', symerror: 1.4} - value: 80.6 - - errors: - - {label: stat, symerror: 0.6} - - {label: sys, symerror: 1.5} - - {label: 'sys,beam', symerror: 0.6} - - {label: 'sys,lumi', symerror: 1.2} - value: 68.6 - - errors: - - {label: stat, symerror: 0.7} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 0.9} - - {label: 'sys,lumi', symerror: 1.6} - value: 95.9 - - errors: - - {label: stat, symerror: 0.8} - - {label: sys, symerror: 1.2} - - {label: 'sys,beam', symerror: 0.4} - - {label: 'sys,lumi', symerror: 0.7} - value: 43.8 -- header: {name: '$f^{-}_{FSR}$'} - qualifiers: - - {name: ETARAP(MU), value: 2.0-4.5} - - {name: PT(MU), units: GEV, value: '> 20'} - - {name: RE, value: ''} - - {name: SQRT(S), units: GeV, value: '7000.0'} - values: - - errors: - - {symerror: 0.003} - value: 1.019 - - errors: - - {symerror: 0.003} - value: 1.015 - - errors: - - {symerror: 0.003} - value: 1.01 - - errors: - - {symerror: 0.002} - value: 1.007 - - errors: - - {symerror: 0.003} - value: 1.009 - - errors: - - {symerror: 0.005} - value: 1.017 - - errors: - - {symerror: 0.005} - value: 1.012 - - errors: - - {symerror: 0.0} - value: 1.0 -independent_variables: -- header: {name: $\eta$} - values: - - {high: 2.25, low: 2.0} - - {high: 2.5, low: 2.25} - - {high: 2.75, low: 2.5} - - {high: 3.0, low: 2.75} - - {high: 3.25, low: 3.0} - - {high: 3.5, low: 3.25} - - {high: 4.0, low: 3.5} - - {high: 4.5, low: 4.0} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/rawdata/corrmat.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/HEPData-ins1347133-v1-yaml/corrmat.corr similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/rawdata/corrmat.corr rename to nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/HEPData-ins1347133-v1-yaml/corrmat.corr diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/filter.py index c78c9ae2f7..7be97a2095 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/filter.py @@ -13,13 +13,15 @@ NORM_FACTOR = 1_000.0 # from pb -> fb -def load_yaml(table_id: int) -> dict: +def load_yaml(table_id: int, version: int = 1) -> dict: """Load the HEP data table in yaml format. Parameters ---------- table_id: int table ID number + version: int + HepData version Returns ------- @@ -27,7 +29,8 @@ def load_yaml(table_id: int) -> dict: ditionary containing the table contents """ - table = pathlib.Path(f"./rawdata/Table{table_id}.yaml") + foldername = f"HEPData-ins1347133-v{version}-yaml" + table = pathlib.Path(f"./{foldername}/Table{table_id}.yaml") return yaml.safe_load(table.read_text()) @@ -129,13 +132,15 @@ def get_errors(hepdata: dict) -> dict: return {"stat": stat, "sys_uncorr": sys_uncorr, "sys_corr": sys_corr, "sys_lumi": sys_lumi} -def read_corrmatrix(nb_datapoints: int) -> np.ndarray: +def read_corrmatrix(nb_datapoints: int, version: int = 1) -> np.ndarray: """Read the matrix and returns a symmetrized verions. Parameters ---------- nb_datapoints: int total number of datapoints + version: int + HepData version Returns ------- @@ -144,7 +149,7 @@ def read_corrmatrix(nb_datapoints: int) -> np.ndarray: """ corrmat = pd.read_csv( - "./rawdata/corrmat.corr", + f"./HEPData-ins1347133-v{version}-yaml/corrmat.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -314,12 +319,12 @@ def main_filter(): 4. Luminosity Systematic uncertainties: MULT, LHCBLUMI8TEV """ - _, nbpoints, tables = read_metadata() + version, nbpoints, tables = read_metadata() comb_kins, comb_data = [], [] combined_errors = [] for tabid in tables: - yaml_content = load_yaml(table_id=tabid) + yaml_content = load_yaml(table_id=tabid, version=version) # Extract the kinematic, data, and uncertainties kinematics = get_kinematics(hepdata=yaml_content) @@ -333,7 +338,7 @@ def main_filter(): errors_combined = concatenate_dicts(combined_errors) # Compute the Artifical Systematics from CovMat - corrmat = read_corrmatrix(nb_datapoints=nbpoints) + corrmat = read_corrmatrix(nb_datapoints=nbpoints, version=version) covmat = multiply_syst(corrmat, errors_combined["sys_corr"]) artunc = generate_artificial_unc(ndata=nbpoints, covmat_list=covmat.tolist(), no_of_norm_mat=0) errors = format_uncertainties(errors_combined, artunc) diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/rawdata/Table2.yaml b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/rawdata/Table2.yaml deleted file mode 100644 index 481c78855b..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_DIELECTRON/rawdata/Table2.yaml +++ /dev/null @@ -1,190 +0,0 @@ -dependent_variables: -- header: {name: '${\rm d}\sigma / {\rm d} y_{Z}$', units: pb} - qualifiers: - - {name: ETARAP(E), value: 2.0-4.5} - - {name: M(E+ E-), units: GeV, value: 60.0-120.0} - - {name: PT(E), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < E+ E- > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {label: stat, symerror: 0.37} - - {label: 'sys,uncorr', symerror: 0.21} - - {label: 'sys,corr', symerror: 0.14} - - {label: 'sys,lumi', symerror: 0.1} - value: 8.27 - - errors: - - {label: stat, symerror: 0.61} - - {label: 'sys,uncorr', symerror: 0.32} - - {label: 'sys,corr', symerror: 0.43} - - {label: 'sys,lumi', symerror: 0.32} - value: 26.17 - - errors: - - {label: stat, symerror: 0.72} - - {label: 'sys,uncorr', symerror: 0.36} - - {label: 'sys,corr', symerror: 0.62} - - {label: 'sys,lumi', symerror: 0.49} - value: 40.29 - - errors: - - {label: stat, symerror: 0.8} - - {label: 'sys,uncorr', symerror: 0.39} - - {label: 'sys,corr', symerror: 0.81} - - {label: 'sys,lumi', symerror: 0.64} - value: 52.16 - - errors: - - {label: stat, symerror: 0.86} - - {label: 'sys,uncorr', symerror: 0.4} - - {label: 'sys,corr', symerror: 1.01} - - {label: 'sys,lumi', symerror: 0.77} - value: 61.92 - - errors: - - {label: stat, symerror: 0.93} - - {label: 'sys,uncorr', symerror: 0.45} - - {label: 'sys,corr', symerror: 1.1} - - {label: 'sys,lumi', symerror: 0.88} - value: 72.32 - - errors: - - {label: stat, symerror: 0.98} - - {label: 'sys,uncorr', symerror: 0.47} - - {label: 'sys,corr', symerror: 1.16} - - {label: 'sys,lumi', symerror: 0.93} - value: 76.29 - - errors: - - {label: stat, symerror: 0.99} - - {label: 'sys,uncorr', symerror: 0.48} - - {label: 'sys,corr', symerror: 1.18} - - {label: 'sys,lumi', symerror: 0.95} - value: 77.67 - - errors: - - {label: stat, symerror: 1.03} - - {label: 'sys,uncorr', symerror: 0.51} - - {label: 'sys,corr', symerror: 1.18} - - {label: 'sys,lumi', symerror: 0.95} - value: 77.72 - - errors: - - {label: stat, symerror: 1.02} - - {label: 'sys,uncorr', symerror: 0.5} - - {label: 'sys,corr', symerror: 1.06} - - {label: 'sys,lumi', symerror: 0.85} - value: 69.58 - - errors: - - {label: stat, symerror: 1.01} - - {label: 'sys,uncorr', symerror: 0.51} - - {label: 'sys,corr', symerror: 0.96} - - {label: 'sys,lumi', symerror: 0.76} - value: 62.03 - - errors: - - {label: stat, symerror: 0.92} - - {label: 'sys,uncorr', symerror: 0.46} - - {label: 'sys,corr', symerror: 0.71} - - {label: 'sys,lumi', symerror: 0.56} - value: 46.26 - - errors: - - {label: stat, symerror: 0.84} - - {label: 'sys,uncorr', symerror: 0.41} - - {label: 'sys,corr', symerror: 0.53} - - {label: 'sys,lumi', symerror: 0.41} - value: 33.49 - - errors: - - {label: stat, symerror: 0.74} - - {label: 'sys,uncorr', symerror: 0.37} - - {label: 'sys,corr', symerror: 0.36} - - {label: 'sys,lumi', symerror: 0.28} - value: 22.81 - - errors: - - {label: stat, symerror: 0.64} - - {label: 'sys,uncorr', symerror: 0.33} - - {label: 'sys,corr', symerror: 0.28} - - {label: 'sys,lumi', symerror: 0.17} - value: 13.56 - - errors: - - {label: stat, symerror: 0.57} - - {label: 'sys,uncorr', symerror: 0.28} - - {label: 'sys,corr', symerror: 0.13} - - {label: 'sys,lumi', symerror: 0.08} - value: 6.28 - - errors: - - {label: stat, symerror: 0.33} - - {label: 'sys,uncorr', symerror: 0.16} - - {label: 'sys,corr', symerror: 0.04} - - {label: 'sys,lumi', symerror: 0.02} - value: 1.85 -- header: {name: '$f_{\rm FSR}$'} - qualifiers: - - {name: ETARAP(E), value: 2.0-4.5} - - {name: M(E+ E-), units: GeV, value: 60.0-120.0} - - {name: PT(E), units: GEV, value: '> 20'} - - {name: RE, value: P P --> Z0 < E+ E- > X} - - {name: SQRT(S), units: GeV, value: '8000.0'} - values: - - errors: - - {symerror: 0.003} - value: 0.953 - - errors: - - {symerror: 0.002} - value: 0.955 - - errors: - - {symerror: 0.001} - value: 0.959 - - errors: - - {symerror: 0.001} - value: 0.96 - - errors: - - {symerror: 0.001} - value: 0.958 - - errors: - - {symerror: 0.001} - value: 0.958 - - errors: - - {symerror: 0.001} - value: 0.956 - - errors: - - {symerror: 0.001} - value: 0.952 - - errors: - - {symerror: 0.001} - value: 0.952 - - errors: - - {symerror: 0.001} - value: 0.949 - - errors: - - {symerror: 0.001} - value: 0.95 - - errors: - - {symerror: 0.001} - value: 0.949 - - errors: - - {symerror: 0.002} - value: 0.947 - - errors: - - {symerror: 0.002} - value: 0.951 - - errors: - - {symerror: 0.002} - value: 0.946 - - errors: - - {symerror: 0.004} - value: 0.939 - - errors: - - {symerror: 0.005} - value: 0.928 -independent_variables: -- header: {name: '$y_{Z}$'} - values: - - {high: 2.125, low: 2.0} - - {high: 2.25, low: 2.125} - - {high: 2.375, low: 2.25} - - {high: 2.5, low: 2.375} - - {high: 2.625, low: 2.5} - - {high: 2.75, low: 2.625} - - {high: 2.875, low: 2.75} - - {high: 3.0, low: 2.875} - - {high: 3.125, low: 3.0} - - {high: 3.25, low: 3.125} - - {high: 3.375, low: 3.25} - - {high: 3.5, low: 3.375} - - {high: 3.625, low: 3.5} - - {high: 3.75, low: 3.625} - - {high: 3.875, low: 3.75} - - {high: 4.0, low: 3.875} - - {high: 4.25, low: 4.0} diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/HEPData-ins1406555-v1-yaml/corrmat.corr b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/HEPData-ins1406555-v1-yaml/corrmat.corr new file mode 100644 index 0000000000..e611042482 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/HEPData-ins1406555-v1-yaml/corrmat.corr @@ -0,0 +1,34 @@ +1.0 +0.19 1.0 +0.17 0.27 1.0 +0.16 0.26 0.28 1.0 +0.16 0.25 0.28 0.29 1.0 +0.15 0.24 0.27 0.29 0.3 1.0 +0.14 0.23 0.26 0.28 0.29 0.3 1.0 +0.14 0.21 0.25 0.27 0.29 0.3 0.3 1.0 +0.13 0.2 0.23 0.25 0.27 0.28 0.29 0.29 1.0 +0.11 0.17 0.2 0.23 0.25 0.26 0.27 0.28 0.27 1.0 +0.09 0.14 0.16 0.18 0.2 0.22 0.22 0.23 0.23 0.23 1.0 +0.08 0.12 0.15 0.17 0.19 0.2 0.21 0.22 0.22 0.22 0.2 1.0 +0.07 0.1 0.12 0.14 0.16 0.17 0.18 0.19 0.19 0.2 0.18 0.19 1.0 +0.06 0.08 0.1 0.11 0.13 0.14 0.15 0.16 0.16 0.17 0.16 0.16 0.15 1.0 +0.05 0.07 0.08 0.09 0.1 0.11 0.11 0.12 0.13 0.13 0.12 0.13 0.12 0.11 1.0 +0.03 0.05 0.06 0.06 0.07 0.08 0.08 0.09 0.09 0.1 0.09 0.1 0.09 0.08 0.07 1.0 +0.03 0.04 0.04 0.05 0.05 0.06 0.06 0.06 0.07 0.07 0.07 0.08 0.07 0.07 0.06 0.05 1.0 +0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.02 0.02 0.01 0.01 0.01 0.01 1.0 +0.23 0.3 0.28 0.27 0.26 0.25 0.24 0.23 0.21 0.18 0.15 0.13 0.11 0.1 0.07 0.05 0.04 0.01 1.0 +0.05 0.15 0.21 0.2 0.2 0.2 0.2 0.19 0.18 0.16 0.14 0.12 0.1 0.08 0.06 0.04 0.03 0.01 0.2 1.0 +0.04 0.07 0.12 0.15 0.16 0.17 0.17 0.17 0.16 0.15 0.13 0.13 0.11 0.08 0.06 0.05 0.03 0.01 0.13 0.12 1.0 +0.05 0.08 0.1 0.13 0.16 0.17 0.17 0.17 0.16 0.16 0.14 0.13 0.12 0.09 0.07 0.05 0.03 0.01 0.06 0.03 0.26 1.0 +0.06 0.08 0.1 0.11 0.14 0.16 0.17 0.17 0.16 0.16 0.14 0.14 0.12 0.1 0.07 0.05 0.03 0.01 0.07 0.03 0.25 0.33 1.0 +0.04 0.06 0.07 0.09 0.1 0.11 0.12 0.13 0.13 0.12 0.11 0.11 0.09 0.08 0.06 0.04 0.03 0.0 0.03 -0.01 0.28 0.35 0.34 1.0 +0.04 0.06 0.07 0.08 0.09 0.1 0.11 0.12 0.12 0.12 0.11 0.11 0.1 0.09 0.07 0.05 0.04 0.0 0.0 -0.06 0.31 0.41 0.4 0.45 1.0 +0.02 0.03 0.04 0.04 0.04 0.04 0.05 0.05 0.06 0.06 0.06 0.07 0.06 0.06 0.05 0.04 0.04 0.01 -0.07 -0.14 0.14 0.32 0.32 0.35 0.40 1.0 +0.21 0.28 0.26 0.25 0.24 0.24 0.22 0.21 0.2 0.17 0.14 0.12 0.11 0.09 0.07 0.05 0.04 0.01 0.67 0.10 0.24 0.22 0.22 0.23 0.26 0.14 1.0 +0.04 0.14 0.19 0.18 0.18 0.18 0.18 0.17 0.16 0.15 0.12 0.11 0.09 0.07 0.05 0.04 0.03 0.0 0.07 0.54 0.23 0.28 0.28 0.28 0.33 0.24 0.21 1.0 +0.04 0.07 0.11 0.14 0.15 0.15 0.15 0.15 0.15 0.14 0.12 0.12 0.1 0.08 0.06 0.04 0.03 0.01 0.05 0.03 0.64 0.27 0.26 0.27 0.32 0.23 0.18 0.22 1.0 +0.05 0.07 0.09 0.12 0.14 0.15 0.15 0.16 0.15 0.14 0.12 0.12 0.11 0.08 0.06 0.04 0.03 0.01 0.04 0.00 0.25 0.70 0.30 0.33 0.39 0.29 0.21 0.25 0.31 1.0 +0.05 0.08 0.09 0.1 0.13 0.15 0.15 0.16 0.15 0.15 0.13 0.13 0.11 0.09 0.07 0.05 0.03 0.01 0.06 0.03 0.26 0.32 0.68 0.32 0.38 0.26 0.22 0.28 0.27 0.32 1.0 +0.04 0.06 0.08 0.09 0.1 0.12 0.13 0.13 0.13 0.13 0.11 0.11 0.1 0.08 0.06 0.04 0.03 0.0 0.07 0.04 0.30 0.28 0.27 0.63 0.36 0.22 0.23 0.23 0.29 0.32 0.28 1.0 +0.04 0.06 0.08 0.09 0.1 0.11 0.12 0.13 0.14 0.14 0.12 0.12 0.11 0.1 0.08 0.06 0.04 0.01 0.14 0.20 -0.13 -0.08 -0.07 -0.17 -0.04 -0.15 -0.06 -0.04 -0.04 -0.11 -0.07 -0.19 1.0 +0.03 0.04 0.04 0.05 0.05 0.06 0.06 0.06 0.07 0.07 0.07 0.08 0.08 0.07 0.06 0.05 0.04 0.01 0.12 0.17 -0.18 -0.17 -0.15 -0.23 -0.31 0.05 -0.09 -0.11 -0.14 -0.19 -0.18 -0.24 0.48 1.0 diff --git a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py index 83eaaa249b..d4f164cc61 100644 --- a/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/LHCB_Z0_8TEV_MUON/filter.py @@ -29,6 +29,8 @@ def load_yaml(table_id: int, version: int = 1) -> dict: ---------- table_id: int table ID number + version: int + HepData version Returns ------- @@ -147,13 +149,15 @@ def get_errors(hepdata: dict, bin_index: list, indx: int = 0) -> dict: return {"stat": stat, "sys_corr": sys_corr, "sys_beam": sys_beam, "sys_lumi": sys_lumi} -def read_corrmatrix(nb_datapoints: int) -> np.ndarray: +def read_corrmatrix(nb_datapoints: int, version: int = 1) -> np.ndarray: """Read the matrix and returns a symmetrized verions. Parameters ---------- nb_datapoints: int total number of datapoints + version: int + HepData version Returns ------- @@ -162,7 +166,7 @@ def read_corrmatrix(nb_datapoints: int) -> np.ndarray: """ corrmat = pd.read_csv( - "./HEPData-ins1406555-v1-yaml/corrmat.corr", + f"./HEPData-ins1406555-v{version}-yaml/corrmat.corr", names=[f'{i}' for i in range(nb_datapoints)], delim_whitespace=True, ) @@ -366,7 +370,7 @@ def main_filter(boson: str = "Z") -> None: errors_combined = concatenate_dicts(combined_errors) # Compute the Artifical Systematics from CovMat - corrmat = read_corrmatrix(nb_datapoints=nbpoints) + corrmat = read_corrmatrix(nb_datapoints=nbpoints, version=version) covmat = multiply_syst(corrmat, errors_combined["sys_corr"]) artunc = generate_artificial_unc(ndata=nbpoints, covmat_list=covmat.tolist(), no_of_norm_mat=0) errors = format_uncertainties(errors_combined, artunc, bslice) From 8b336493d2b34899aed4ed68ad18ab552dca8dfe Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Fri, 29 Nov 2024 22:47:48 +0100 Subject: [PATCH 6/7] Update workflow that checks the data generation --- .github/workflows/check_newcd.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/check_newcd.yml b/.github/workflows/check_newcd.yml index 5c33db7023..65abb0901a 100644 --- a/.github/workflows/check_newcd.yml +++ b/.github/workflows/check_newcd.yml @@ -22,13 +22,13 @@ jobs: here=$PWD readarray -d '' array < <(find ./nnpdf_data/nnpdf_data/commondata -name "filter.py" -print0) for datname in "${array[@]}"; do dirpath=${datname%/*}; cd $dirpath; python filter.py || exit $?; cd $here; done - - name: Check for modified files 🛎️ - uses: tj-actions/verify-changed-files@v20 - id: verify-changed-files - - name: List all changed files and fail if any 🛎️ - if: steps.verify-changed-files.outputs.files_changed == 'true' - env: - CHANGED_FILES: ${{ steps.verify-changed-files.outputs.changed_files }} + - name: Check for modified commondata files 🛎️ + shell: bash -l {0} run: | - echo "Changed files: $CHANGED_FILES" - exit 1 + if [[ -n "$(git diff --name-only)" ]]; then + echo "Some of the commondata files have changed ❌" + git diff --name-only + exit 1 + else + echo "No modified commondata files ✅" + fi From 3e4a15f494d6398f6d2e811f6b62775950e9f369 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Fri, 29 Nov 2024 22:49:12 +0100 Subject: [PATCH 7/7] Add part that downloads the raw data tables first --- .github/workflows/check_newcd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/check_newcd.yml b/.github/workflows/check_newcd.yml index 65abb0901a..397fb83e25 100644 --- a/.github/workflows/check_newcd.yml +++ b/.github/workflows/check_newcd.yml @@ -19,6 +19,7 @@ jobs: - name: Run the filters 📦 shell: bash -l {0} run: | + download-hepdata here=$PWD readarray -d '' array < <(find ./nnpdf_data/nnpdf_data/commondata -name "filter.py" -print0) for datname in "${array[@]}"; do dirpath=${datname%/*}; cd $dirpath; python filter.py || exit $?; cd $here; done