YeongdeokSeo · YeongdeokSeo · Oct 13, 2021 · Oct 13, 2021 · Oct 19, 2021 · Dec 20, 2021
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -5,11 +5,14 @@ jobs:
   test:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       # Install dependencies
       - name: Setup Miniconda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v3
         with:
+          miniforge-version: latest
+          use-mamba: true
+          channels: conda-forge
           activate-environment: l1metml
           environment-file: environment.yml
           python-version: 3.9
@@ -29,4 +32,4 @@ jobs:
       - name: Run autopep8
         shell: bash -l {0}
         run: |
-          autopep8 --in-place --aggressive --aggressive --recursive . --exit-code
+          autopep8 --in-place --aggressive --aggressive --recursive . --exit-code
diff --git a/DataGenerator.py b/DataGenerator.py
@@ -7,13 +7,14 @@
 from utils import convertXY2PtPhi, preProcessing, to_np_array
 import h5py
 import os
+import itertools
 
 
 class DataGenerator(tensorflow.keras.utils.Sequence):
     'Generates data for Keras'
 
-    def __init__(self, list_files, batch_size=1024, n_dim=100,
-                 max_entry=100000000):
+    def __init__(self, list_files, batch_size=1024, n_dim=100, maxNPF=100, compute_ef=0,
+                 max_entry=100000000, edge_list=[]):
         'Initialization'
         self.n_features_pf = 6
         self.n_features_pf_cat = 2
@@ -26,6 +27,9 @@ def __init__(self, list_files, batch_size=1024, n_dim=100,
         self.file_mapping = []
         self.max_entry = max_entry
         self.open_files = [None]*len(list_files)
+        self.maxNPF = maxNPF
+        self.compute_ef = compute_ef
+        self.edge_list = edge_list
         running_total = 0
 
         self.h5files = []
@@ -57,7 +61,6 @@ def __getitem__(self, index):
         # Generate indexes of the batch
         indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
         files = self.file_mapping[index*self.batch_size:(index+1)*self.batch_size]
-
         unique_files = np.unique(files)
         starts = np.array([min(indexes[files == i]) for i in unique_files])
         stops = np.array([max(indexes[files == i]) for i in unique_files])
@@ -80,6 +83,32 @@ def on_epoch_end(self):
         'Updates indexes after each epoch'
         self.indexes = self.local_IDs
 
+    def deltaR_calc(self, eta1, phi1, eta2, phi2):
+        """ calculate deltaR """
+        dphi = (phi1-phi2)
+        gt_pi_idx = (dphi > np.pi)
+        lt_pi_idx = (dphi < -np.pi)
+        dphi[gt_pi_idx] -= 2*np.pi
+        dphi[lt_pi_idx] += 2*np.pi
+        deta = eta1-eta2
+        return np.hypot(deta, dphi)
+
+    def kT_calc(self, pti, ptj, dR):
+        min_pt = np.minimum(pti, ptj)
+        kT = min_pt * dR
+        return kT
+
+    def z_calc(self, pti, ptj):
+        epsilon = 1.0e-12
+        min_pt = np.minimum(pti, ptj)
+        z = min_pt/(pti + ptj + epsilon)
+        return z
+
+    def mass2_calc(self, pi, pj):
+        pij = pi + pj
+        m2 = pij[:, :, 0]**2 - pij[:, :, 1]**2 - pij[:, :, 2]**2 - pij[:, :, 3]**2
+        return m2
+
     def __data_generation(self, unique_files, starts, stops):
         'Generates data containing batch_size samples'
         # X : (n_samples, n_dim, n_channels)
@@ -102,15 +131,63 @@ def __data_generation(self, unique_files, starts, stops):
         Y = self.y / (-self.normFac)
         Xi, Xp, Xc1, Xc2 = preProcessing(self.X, self.normFac)
 
-        Xc = [Xc1, Xc2]
-        # dimension parameter for keras model
-        self.emb_input_dim = {i: int(np.max(Xc[i][0:1000])) + 1 for i in range(self.n_features_pf_cat)}
+        N = self.maxNPF
+        Nr = N*(N-1)
+
+        if self.compute_ef == 1:
+            eta = Xi[:, :, 1]
+            phi = Xi[:, :, 2]
+            pt = Xi[:, :, 0]
+            if ('m2' in self.edge_list):
+                px = Xp[:, :, 0]
+                py = Xp[:, :, 1]
+                pz = pt*np.sinh(eta)
+                energy = np.sqrt(px**2 + py**2 + pz**2)
+                p4 = np.stack((energy, px, py, pz), axis=-1)
+            receiver_sender_list = [i for i in itertools.product(range(N), range(N)) if i[0] != i[1]]
+            edge_idx = np.array(receiver_sender_list)
+            edge_stack = []
+            if ('dR' in self.edge_list) or ('kT' in self.edge_list):
+                eta1 = eta[:, edge_idx[:, 0]]
+                phi1 = phi[:, edge_idx[:, 0]]
+                eta2 = eta[:, edge_idx[:, 1]]
+                phi2 = phi[:, edge_idx[:, 1]]
+                dR = self.deltaR_calc(eta1, phi1, eta2, phi2)
+                edge_stack.append(dR)
+            if ('kT' in self.edge_list) or ('z' in self.edge_list):
+                pt1 = pt[:, edge_idx[:, 0]]
+                pt2 = pt[:, edge_idx[:, 1]]
+                if ('kT' in self.edge_list):
+                    kT = self.kT_calc(pt1, pt2, dR)
+                    edge_stack.append(kT)
+                if ('z' in self.edge_list):
+                    z = self.z_calc(pt1, pt2)
+                    edge_stack.append(z)
+            if ('m2' in self.edge_list):
+                p1 = p4[:, edge_idx[:, 0], :]
+                p2 = p4[:, edge_idx[:, 1], :]
+                m2 = self.mass2_calc(p1, p2)
+                edge_stack.append(m2)
+            ef = np.stack(edge_stack, axis=-1)
+
+            Xc = [Xc1, Xc2]
+            # dimension parameter for keras model
+            self.emb_input_dim = {i: int(np.max(Xc[i][0:1000])) + 1 for i in range(self.n_features_pf_cat)}
+
+            # Prepare training/val data
+            Yr = Y
+            Xr = [Xi, Xp] + Xc + [ef]
+            return Xr, Yr
 
-        # Prepare training/val data
-        Yr = Y
-        Xr = [Xi, Xp] + Xc
+        else:
+            Xc = [Xc1, Xc2]
+            # dimension parameter for keras model
+            self.emb_input_dim = {i: int(np.max(Xc[i][0:1000])) + 1 for i in range(self.n_features_pf_cat)}
 
-        return Xr, Yr
+            # Prepare training/val data
+            Yr = Y
+            Xr = [Xi, Xp] + Xc
+            return Xr, Yr
 
     def __get_features_labels(self, ifile, entry_start, entry_stop):
         'Loads data from one file'
@@ -124,4 +201,11 @@ def __get_features_labels(self, ifile, entry_start, entry_stop):
         X = h5_file['X'][entry_start:entry_stop+1]
         y = h5_file['Y'][entry_start:entry_stop+1]
 
+        if self.maxNPF < 100:
+            order = X[:, :, 0].argsort(axis=1)[:, ::-1]
+            shape = np.shape(X)
+            for x in range(shape[0]):
+                X[x, :, :] = X[x, order[x], :]
+            X = X[:, 0:self.maxNPF, :]
+
         return X, y
diff --git a/Dockerfile b/Dockerfile
@@ -6,12 +6,15 @@ LABEL maintainer="Javier Duarte <[email protected]>"
 
 USER root
 
-RUN apt-get update && apt-get -y install openssh-client
+RUN apt-get update \
+    && apt-get -yq --no-install-recommends install openssh-client vim emacs \
+    && sudo apt-get clean && sudo rm -rf /var/lib/apt/lists/*
 
 USER ${NB_UID}
 
 # Install Tensorflow
 RUN pip install --quiet --no-cache-dir \
+    coffea \
     uproot \
     awkward \
     uproot \
@@ -20,5 +23,5 @@ RUN pip install --quiet --no-cache-dir \
     mplhep \
     autopep8 \
     git+https://github.com/google/qkeras#egg=qkeras \
-    git+https://github.com/jmduarte/hls4ml@l1metml#egg=hls4ml[profiling]
+    git+https://github.com/jmduarte/hls4ml@split_pointwise_conv_by_rf#egg=hls4ml[profiling]
 
diff --git a/README.md b/README.md
@@ -31,3 +31,13 @@ For example,
 ```bash
 python train.py --workflowType dataGenerator --input ./path/to/files/ --output ./path/to/result/ --mode 1 --epochs --quantized 8 2 --units 12 36
 ```
+
+### Test
+You need output results with input TTbar and SingleNeutrino.
+When you use SingleNeutrino sample as input please change 'TTbar' to 'SingleNeutrino' in test function in train.py. #L66-67
+
+SingleNeutrino sample is located in : https://cernbox.cern.ch/index.php/s/5inLVZpXreq1vOx
+
+```bash
+python rate_test.py --input [path to input files (output path of train.py)] --plot [ROC, rate, rate_com]
+```
diff --git a/convert_full_model.py b/convert_full_model.py
@@ -0,0 +1,165 @@
+import tensorflow
+from models import dense_embedding
+from tensorflow.keras.layers import Input, Concatenate
+from tensorflow.keras.models import Model
+import numpy as np
+import hls4ml
+import pandas as pd
+from qkeras.utils import _add_supported_quantized_objects
+from models import dense_embedding, dense_embedding_quantized
+from utils import preProcessing
+import h5py
+import scipy
+
+co = {}
+_add_supported_quantized_objects(co)
+
+
+def print_dict(d, indent=0):
+    align = 20
+    for key, value in d.items():
+        print('  ' * indent + str(key), end='')
+        if isinstance(value, dict):
+            print()
+            print_dict(value, indent+1)
+        else:
+            print(':' + ' ' * (20 - len(key) - 2 * indent) + str(value))
+
+
+# load full model:
+model_name = 'trained_DeepMET'
+# model_name = 'trained_quantized_DeepMET'
+# model_name = 'trained_quantized_DeepMET_normfac1000'
+model = tensorflow.keras.models.load_model(f'models/baseline_DeepMET{"_quantized" if "quantized" in model_name else ""}/{model_name}.h5', compile=False, custom_objects=co)
+
+reuse_factor = 1
+precision = 'ap_fixed<32,16>'
+io_type = 'io_parallel'
+strategy = 'Latency'
+output_dir = 'hls_output_{}_{}_{}_rf{}_{}'.format(model_name ,io_type, strategy, reuse_factor, precision)
+batch_size = 1
+synth = False
+trace = True
+normFac = 1
+
+# check everthing works
+model.summary()
+model.save('{}/model.h5'.format(output_dir))
+
+config = hls4ml.utils.config_from_keras_model(model, 
+                                              granularity='name',
+                                              default_reuse_factor=reuse_factor, 
+                                              default_precision=precision)
+config['Model']['Strategy'] = strategy
+for name in config['LayerName'].keys():
+    config['LayerName'][name]['Trace'] = trace
+config['LayerName']['input_cat0']['Precision']['result'] = 'ap_uint<4>'
+config['LayerName']['input_cat1']['Precision']['result'] = 'ap_uint<4>'
+# config['LayerName']['input_cont']['Precision']['result'] = 'ap_fixed<20,10>'
+#if 'q_dense' in config['LayerName']:
+#    config['LayerName']['q_dense']['Precision']['accum'] = 'ap_fixed<32,16>'
+#    config['LayerName']['q_dense']['Precision']['weight'] = 'ap_fixed<32,16>'
+#    config['LayerName']['q_dense']['Precision']['bias'] = 'ap_fixed<32,16>'
+# config['LayerName']['q_dense_1']['Precision']['accum'] = 'ap_fixed<32,16>'
+# config['LayerName']['q_dense_1']['Precision']['weight'] = 'ap_fixed<32,16>'
+# config['LayerName']['q_dense_1']['Precision']['bias'] = 'ap_fixed<32,16>'
+config['LayerName']['multiply']['n_elem'] = 100
+config['LayerName']['output']['n_filt'] = 2
+# skip optimize_pointwise_conv
+# config['SkipOptimizers'] = ['optimize_pointwise_conv']
+# for layer in config['LayerName'].keys():
+#    config['LayerName'][layer]['Trace'] = True
+
+print("-----------------------------------")
+print_dict(config)
+print("-----------------------------------")
+hls_model = hls4ml.converters.convert_from_keras_model(model,
+                                                       hls_config=config,
+                                                       io_type=io_type,
+                                                       output_dir=output_dir,
+                                                       part='xcvu13p-flga2577-2-e',
+                                                       clock_period=5,
+                                                       project_name='L1METML_v1',
+)
+hls_model.compile()
+
+hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file='{}/model_hls4ml.png'.format(output_dir))
+
+if synth:
+    hls_model.build(synth=synth)
+    hls4ml.report.read_vivado_report(output_dir)
+
+f = h5py.File('data/test_data.h5')
+# 1000 test events is good enough
+X = f['X'][:1000]
+y = -f['Y'][:1000]
+
+# preprocessing
+X_pre = list(preProcessing(X, normFac=normFac))
+X_pre = [np.ascontiguousarray(x) for x in X_pre]
+
+y_pred = model.predict(X_pre)
+y_hls = hls_model.predict(X_pre)
+
+met = np.hypot(y[:, 0], y[:, 1])
+met_pred = np.hypot(y_pred[:, 0], y_pred[:, 1]) * normFac
+met_hls = np.hypot(y_hls[:, 0], y_hls[:, 1]) * normFac
+met_pup_x = np.sum(X[:, :, 1], axis=-1)
+met_pup_y = np.sum(X[:, :, 2], axis=-1)
+met_pup = np.hypot(met_pup_x, met_pup_y)
+
+import seaborn
+import pandas as pd
+import matplotlib.pyplot as plt
+
+df = pd.DataFrame.from_dict({'Gen MET': met, 'PUPPI MET': met_pup, 'QKeras MET': met_pred, 'hls4ml MET': met_hls})
+plt.figure()
+seaborn.pairplot(df, corner=True)
+plt.savefig(f'{output_dir}/profiling_MET.png', dpi=300)
+
+df = pd.DataFrame.from_dict({'Gen MET x': y[:, 0], 'PUPPI MET x': met_pup_x, 'QKeras MET x': y_pred[:, 0], 'hls4ml MET x': y_hls[:, 0]})
+plt.figure()
+seaborn.pairplot(df, corner=True)
+plt.savefig(f'{output_dir}/profiling_MET_x.png', dpi=300)
+
+df = pd.DataFrame.from_dict({'Gen MET y': y[:, 1], 'PUPPI MET y': met_pup_y, 'QKeras MET y': y_pred[:, 1], 'hls4ml MET y': y_hls[:, 1]})
+plt.figure()
+seaborn.pairplot(df, corner=True)
+plt.savefig(f'{output_dir}/profiling_MET_y.png', dpi=300)
+
+response_pup = met_pup / met
+response_pred = met_pred / met
+response_hls = met_hls / met
+bins = np.linspace(0, 2, 25)
+plt.figure(figsize=(12, 5))
+plt.subplot(1, 3, 1)
+plt.hist(response_pup, bins=bins, label=f'PUPPI, median={np.median(response_pup):0.2f}, IQR={scipy.stats.iqr(response_pup):0.2f}')
+plt.legend()
+plt.xlabel("MET response $\hat{y}/y$")
+plt.ylabel("Events")
+plt.subplot(1, 3, 2)
+plt.hist(response_pred, bins=bins, label=f'QKeras, median={np.median(response_pred):0.2f}, IQR={scipy.stats.iqr(response_pred):0.2f}')
+plt.legend()
+plt.xlabel("MET response $\hat{y}/y$")
+plt.ylabel("Events")
+plt.subplot(1, 3, 3)
+plt.hist(response_hls, bins=bins, label=f'hls4ml, median={np.median(response_hls):0.2f}, IQR={scipy.stats.iqr(response_hls):0.2f}')
+plt.legend()
+plt.xlabel("MET response $\hat{y}/y$")
+plt.ylabel("Events")
+plt.tight_layout()
+plt.savefig(f"{output_dir}/response_MET.png", dpi=300)
+
+y_hls, hls4ml_trace = hls_model.trace(X_pre)
+keras_trace = hls4ml.model.profiling.get_ymodel_keras(model, X_pre)
+
+for layer in hls4ml_trace.keys():
+    plt.figure()
+    if layer not in keras_trace: continue
+    plt.scatter(hls4ml_trace[layer].flatten(), keras_trace[layer].flatten(), s=0.2)
+    min_x = min(np.amin(hls4ml_trace[layer]), np.amin(keras_trace[layer]))
+    max_x = max(np.amax(hls4ml_trace[layer]), np.amax(keras_trace[layer]))
+    plt.plot([min_x, max_x], [min_x, max_x], c='gray')
+    plt.xlabel(f'hls4ml {layer}')
+    plt.ylabel(f'QKeras {layer}')
+    plt.savefig(f'{output_dir}/profiling_{layer}.png', dpi=300)