Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pull #2

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@ jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
# Install dependencies
- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v2
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-version: latest
use-mamba: true
channels: conda-forge
activate-environment: l1metml
environment-file: environment.yml
python-version: 3.9
Expand All @@ -29,4 +32,4 @@ jobs:
- name: Run autopep8
shell: bash -l {0}
run: |
autopep8 --in-place --aggressive --aggressive --recursive . --exit-code
autopep8 --in-place --aggressive --aggressive --recursive . --exit-code
104 changes: 94 additions & 10 deletions DataGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
from utils import convertXY2PtPhi, preProcessing, to_np_array
import h5py
import os
import itertools


class DataGenerator(tensorflow.keras.utils.Sequence):
'Generates data for Keras'

def __init__(self, list_files, batch_size=1024, n_dim=100,
max_entry=100000000):
def __init__(self, list_files, batch_size=1024, n_dim=100, maxNPF=100, compute_ef=0,
max_entry=100000000, edge_list=[]):
'Initialization'
self.n_features_pf = 6
self.n_features_pf_cat = 2
Expand All @@ -26,6 +27,9 @@ def __init__(self, list_files, batch_size=1024, n_dim=100,
self.file_mapping = []
self.max_entry = max_entry
self.open_files = [None]*len(list_files)
self.maxNPF = maxNPF
self.compute_ef = compute_ef
self.edge_list = edge_list
running_total = 0

self.h5files = []
Expand Down Expand Up @@ -57,7 +61,6 @@ def __getitem__(self, index):
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
files = self.file_mapping[index*self.batch_size:(index+1)*self.batch_size]

unique_files = np.unique(files)
starts = np.array([min(indexes[files == i]) for i in unique_files])
stops = np.array([max(indexes[files == i]) for i in unique_files])
Expand All @@ -80,6 +83,32 @@ def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = self.local_IDs

def deltaR_calc(self, eta1, phi1, eta2, phi2):
""" calculate deltaR """
dphi = (phi1-phi2)
gt_pi_idx = (dphi > np.pi)
lt_pi_idx = (dphi < -np.pi)
dphi[gt_pi_idx] -= 2*np.pi
dphi[lt_pi_idx] += 2*np.pi
deta = eta1-eta2
return np.hypot(deta, dphi)

def kT_calc(self, pti, ptj, dR):
min_pt = np.minimum(pti, ptj)
kT = min_pt * dR
return kT

def z_calc(self, pti, ptj):
epsilon = 1.0e-12
min_pt = np.minimum(pti, ptj)
z = min_pt/(pti + ptj + epsilon)
return z

def mass2_calc(self, pi, pj):
pij = pi + pj
m2 = pij[:, :, 0]**2 - pij[:, :, 1]**2 - pij[:, :, 2]**2 - pij[:, :, 3]**2
return m2

def __data_generation(self, unique_files, starts, stops):
'Generates data containing batch_size samples'
# X : (n_samples, n_dim, n_channels)
Expand All @@ -102,15 +131,63 @@ def __data_generation(self, unique_files, starts, stops):
Y = self.y / (-self.normFac)
Xi, Xp, Xc1, Xc2 = preProcessing(self.X, self.normFac)

Xc = [Xc1, Xc2]
# dimension parameter for keras model
self.emb_input_dim = {i: int(np.max(Xc[i][0:1000])) + 1 for i in range(self.n_features_pf_cat)}
N = self.maxNPF
Nr = N*(N-1)

if self.compute_ef == 1:
eta = Xi[:, :, 1]
phi = Xi[:, :, 2]
pt = Xi[:, :, 0]
if ('m2' in self.edge_list):
px = Xp[:, :, 0]
py = Xp[:, :, 1]
pz = pt*np.sinh(eta)
energy = np.sqrt(px**2 + py**2 + pz**2)
p4 = np.stack((energy, px, py, pz), axis=-1)
receiver_sender_list = [i for i in itertools.product(range(N), range(N)) if i[0] != i[1]]
edge_idx = np.array(receiver_sender_list)
edge_stack = []
if ('dR' in self.edge_list) or ('kT' in self.edge_list):
eta1 = eta[:, edge_idx[:, 0]]
phi1 = phi[:, edge_idx[:, 0]]
eta2 = eta[:, edge_idx[:, 1]]
phi2 = phi[:, edge_idx[:, 1]]
dR = self.deltaR_calc(eta1, phi1, eta2, phi2)
edge_stack.append(dR)
if ('kT' in self.edge_list) or ('z' in self.edge_list):
pt1 = pt[:, edge_idx[:, 0]]
pt2 = pt[:, edge_idx[:, 1]]
if ('kT' in self.edge_list):
kT = self.kT_calc(pt1, pt2, dR)
edge_stack.append(kT)
if ('z' in self.edge_list):
z = self.z_calc(pt1, pt2)
edge_stack.append(z)
if ('m2' in self.edge_list):
p1 = p4[:, edge_idx[:, 0], :]
p2 = p4[:, edge_idx[:, 1], :]
m2 = self.mass2_calc(p1, p2)
edge_stack.append(m2)
ef = np.stack(edge_stack, axis=-1)

Xc = [Xc1, Xc2]
# dimension parameter for keras model
self.emb_input_dim = {i: int(np.max(Xc[i][0:1000])) + 1 for i in range(self.n_features_pf_cat)}

# Prepare training/val data
Yr = Y
Xr = [Xi, Xp] + Xc + [ef]
return Xr, Yr

# Prepare training/val data
Yr = Y
Xr = [Xi, Xp] + Xc
else:
Xc = [Xc1, Xc2]
# dimension parameter for keras model
self.emb_input_dim = {i: int(np.max(Xc[i][0:1000])) + 1 for i in range(self.n_features_pf_cat)}

return Xr, Yr
# Prepare training/val data
Yr = Y
Xr = [Xi, Xp] + Xc
return Xr, Yr

def __get_features_labels(self, ifile, entry_start, entry_stop):
'Loads data from one file'
Expand All @@ -124,4 +201,11 @@ def __get_features_labels(self, ifile, entry_start, entry_stop):
X = h5_file['X'][entry_start:entry_stop+1]
y = h5_file['Y'][entry_start:entry_stop+1]

if self.maxNPF < 100:
order = X[:, :, 0].argsort(axis=1)[:, ::-1]
shape = np.shape(X)
for x in range(shape[0]):
X[x, :, :] = X[x, order[x], :]
X = X[:, 0:self.maxNPF, :]

return X, y
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ LABEL maintainer="Javier Duarte <[email protected]>"

USER root

RUN apt-get update && apt-get -y install openssh-client
RUN apt-get update \
&& apt-get -yq --no-install-recommends install openssh-client vim emacs \
&& sudo apt-get clean && sudo rm -rf /var/lib/apt/lists/*

USER ${NB_UID}

# Install Tensorflow
RUN pip install --quiet --no-cache-dir \
coffea \
uproot \
awkward \
uproot \
Expand All @@ -20,5 +23,5 @@ RUN pip install --quiet --no-cache-dir \
mplhep \
autopep8 \
git+https://github.com/google/qkeras#egg=qkeras \
git+https://github.com/jmduarte/hls4ml@l1metml#egg=hls4ml[profiling]
git+https://github.com/jmduarte/hls4ml@split_pointwise_conv_by_rf#egg=hls4ml[profiling]

10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,13 @@ For example,
```bash
python train.py --workflowType dataGenerator --input ./path/to/files/ --output ./path/to/result/ --mode 1 --epochs --quantized 8 2 --units 12 36
```

### Test
You need output results with input TTbar and SingleNeutrino.
When you use SingleNeutrino sample as input please change 'TTbar' to 'SingleNeutrino' in test function in train.py. #L66-67

SingleNeutrino sample is located in : https://cernbox.cern.ch/index.php/s/5inLVZpXreq1vOx

```bash
python rate_test.py --input [path to input files (output path of train.py)] --plot [ROC, rate, rate_com]
```
165 changes: 165 additions & 0 deletions convert_full_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import tensorflow
from models import dense_embedding
from tensorflow.keras.layers import Input, Concatenate
from tensorflow.keras.models import Model
import numpy as np
import hls4ml
import pandas as pd
from qkeras.utils import _add_supported_quantized_objects
from models import dense_embedding, dense_embedding_quantized
from utils import preProcessing
import h5py
import scipy

co = {}
_add_supported_quantized_objects(co)


def print_dict(d, indent=0):
align = 20
for key, value in d.items():
print(' ' * indent + str(key), end='')
if isinstance(value, dict):
print()
print_dict(value, indent+1)
else:
print(':' + ' ' * (20 - len(key) - 2 * indent) + str(value))


# load full model:
model_name = 'trained_DeepMET'
# model_name = 'trained_quantized_DeepMET'
# model_name = 'trained_quantized_DeepMET_normfac1000'
model = tensorflow.keras.models.load_model(f'models/baseline_DeepMET{"_quantized" if "quantized" in model_name else ""}/{model_name}.h5', compile=False, custom_objects=co)

reuse_factor = 1
precision = 'ap_fixed<32,16>'
io_type = 'io_parallel'
strategy = 'Latency'
output_dir = 'hls_output_{}_{}_{}_rf{}_{}'.format(model_name ,io_type, strategy, reuse_factor, precision)
batch_size = 1
synth = False
trace = True
normFac = 1

# check everthing works
model.summary()
model.save('{}/model.h5'.format(output_dir))

config = hls4ml.utils.config_from_keras_model(model,
granularity='name',
default_reuse_factor=reuse_factor,
default_precision=precision)
config['Model']['Strategy'] = strategy
for name in config['LayerName'].keys():
config['LayerName'][name]['Trace'] = trace
config['LayerName']['input_cat0']['Precision']['result'] = 'ap_uint<4>'
config['LayerName']['input_cat1']['Precision']['result'] = 'ap_uint<4>'
# config['LayerName']['input_cont']['Precision']['result'] = 'ap_fixed<20,10>'
#if 'q_dense' in config['LayerName']:
# config['LayerName']['q_dense']['Precision']['accum'] = 'ap_fixed<32,16>'
# config['LayerName']['q_dense']['Precision']['weight'] = 'ap_fixed<32,16>'
# config['LayerName']['q_dense']['Precision']['bias'] = 'ap_fixed<32,16>'
# config['LayerName']['q_dense_1']['Precision']['accum'] = 'ap_fixed<32,16>'
# config['LayerName']['q_dense_1']['Precision']['weight'] = 'ap_fixed<32,16>'
# config['LayerName']['q_dense_1']['Precision']['bias'] = 'ap_fixed<32,16>'
config['LayerName']['multiply']['n_elem'] = 100
config['LayerName']['output']['n_filt'] = 2
# skip optimize_pointwise_conv
# config['SkipOptimizers'] = ['optimize_pointwise_conv']
# for layer in config['LayerName'].keys():
# config['LayerName'][layer]['Trace'] = True

print("-----------------------------------")
print_dict(config)
print("-----------------------------------")
hls_model = hls4ml.converters.convert_from_keras_model(model,
hls_config=config,
io_type=io_type,
output_dir=output_dir,
part='xcvu13p-flga2577-2-e',
clock_period=5,
project_name='L1METML_v1',
)
hls_model.compile()

hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file='{}/model_hls4ml.png'.format(output_dir))

if synth:
hls_model.build(synth=synth)
hls4ml.report.read_vivado_report(output_dir)

f = h5py.File('data/test_data.h5')
# 1000 test events is good enough
X = f['X'][:1000]
y = -f['Y'][:1000]

# preprocessing
X_pre = list(preProcessing(X, normFac=normFac))
X_pre = [np.ascontiguousarray(x) for x in X_pre]

y_pred = model.predict(X_pre)
y_hls = hls_model.predict(X_pre)

met = np.hypot(y[:, 0], y[:, 1])
met_pred = np.hypot(y_pred[:, 0], y_pred[:, 1]) * normFac
met_hls = np.hypot(y_hls[:, 0], y_hls[:, 1]) * normFac
met_pup_x = np.sum(X[:, :, 1], axis=-1)
met_pup_y = np.sum(X[:, :, 2], axis=-1)
met_pup = np.hypot(met_pup_x, met_pup_y)

import seaborn
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame.from_dict({'Gen MET': met, 'PUPPI MET': met_pup, 'QKeras MET': met_pred, 'hls4ml MET': met_hls})
plt.figure()
seaborn.pairplot(df, corner=True)
plt.savefig(f'{output_dir}/profiling_MET.png', dpi=300)

df = pd.DataFrame.from_dict({'Gen MET x': y[:, 0], 'PUPPI MET x': met_pup_x, 'QKeras MET x': y_pred[:, 0], 'hls4ml MET x': y_hls[:, 0]})
plt.figure()
seaborn.pairplot(df, corner=True)
plt.savefig(f'{output_dir}/profiling_MET_x.png', dpi=300)

df = pd.DataFrame.from_dict({'Gen MET y': y[:, 1], 'PUPPI MET y': met_pup_y, 'QKeras MET y': y_pred[:, 1], 'hls4ml MET y': y_hls[:, 1]})
plt.figure()
seaborn.pairplot(df, corner=True)
plt.savefig(f'{output_dir}/profiling_MET_y.png', dpi=300)

response_pup = met_pup / met
response_pred = met_pred / met
response_hls = met_hls / met
bins = np.linspace(0, 2, 25)
plt.figure(figsize=(12, 5))
plt.subplot(1, 3, 1)
plt.hist(response_pup, bins=bins, label=f'PUPPI, median={np.median(response_pup):0.2f}, IQR={scipy.stats.iqr(response_pup):0.2f}')
plt.legend()
plt.xlabel("MET response $\hat{y}/y$")
plt.ylabel("Events")
plt.subplot(1, 3, 2)
plt.hist(response_pred, bins=bins, label=f'QKeras, median={np.median(response_pred):0.2f}, IQR={scipy.stats.iqr(response_pred):0.2f}')
plt.legend()
plt.xlabel("MET response $\hat{y}/y$")
plt.ylabel("Events")
plt.subplot(1, 3, 3)
plt.hist(response_hls, bins=bins, label=f'hls4ml, median={np.median(response_hls):0.2f}, IQR={scipy.stats.iqr(response_hls):0.2f}')
plt.legend()
plt.xlabel("MET response $\hat{y}/y$")
plt.ylabel("Events")
plt.tight_layout()
plt.savefig(f"{output_dir}/response_MET.png", dpi=300)

y_hls, hls4ml_trace = hls_model.trace(X_pre)
keras_trace = hls4ml.model.profiling.get_ymodel_keras(model, X_pre)

for layer in hls4ml_trace.keys():
plt.figure()
if layer not in keras_trace: continue
plt.scatter(hls4ml_trace[layer].flatten(), keras_trace[layer].flatten(), s=0.2)
min_x = min(np.amin(hls4ml_trace[layer]), np.amin(keras_trace[layer]))
max_x = max(np.amax(hls4ml_trace[layer]), np.amax(keras_trace[layer]))
plt.plot([min_x, max_x], [min_x, max_x], c='gray')
plt.xlabel(f'hls4ml {layer}')
plt.ylabel(f'QKeras {layer}')
plt.savefig(f'{output_dir}/profiling_{layer}.png', dpi=300)
Loading