From a603778acdbbaa9d12a6d44ca28cee712dd5b27e Mon Sep 17 00:00:00 2001 From: Farouk Date: Mon, 2 Dec 2024 19:13:34 +0100 Subject: [PATCH] linting --- mlpf/data/key4hep/postprocessing.py | 71 +++++++---------------------- mlpf/model/PFDataset.py | 16 ++----- 2 files changed, 21 insertions(+), 66 deletions(-) diff --git a/mlpf/data/key4hep/postprocessing.py b/mlpf/data/key4hep/postprocessing.py index 5d7ce8df8..e4b9fc9a1 100644 --- a/mlpf/data/key4hep/postprocessing.py +++ b/mlpf/data/key4hep/postprocessing.py @@ -182,9 +182,7 @@ def __init__( self.cluster_features = cluster_features # feature matrix of the calo clusters self.track_features = track_features # feature matrix of the tracks self.genparticle_to_hit = genparticle_to_hit # sparse COO matrix of genparticles to hits (idx_gp, idx_hit, weight) - self.genparticle_to_track = ( - genparticle_to_track # sparse COO matrix of genparticles to tracks (idx_gp, idx_track, weight) - ) + self.genparticle_to_track = genparticle_to_track # sparse COO matrix of genparticles to tracks (idx_gp, idx_track, weight) self.hit_to_cluster = hit_to_cluster # sparse COO matrix of hits to clusters (idx_hit, idx_cluster, weight) self.gp_merges = gp_merges # sparse COO matrix of any merged genparticles @@ -250,10 +248,7 @@ def get_calohit_matrix_and_genadj(dataset, hit_data, calohit_links, iev, collect hit_idx_global += 1 hit_idx_local_to_global = {v: k for k, v in hit_idx_global_to_local.items()} hit_feature_matrix = awkward.Record( - { - k: awkward.concatenate([hit_feature_matrix[i][k] for i in range(len(hit_feature_matrix))]) - for k in hit_feature_matrix[0].fields - } + {k: awkward.concatenate([hit_feature_matrix[i][k] for i in range(len(hit_feature_matrix))]) for k in hit_feature_matrix[0].fields} ) # add all edges from genparticle to calohit @@ -348,9 +343,7 @@ def gen_to_features(dataset, prop_data, iev): gen_arr = {k.replace(mc_coll + ".", ""): gen_arr[k] for k in gen_arr.fields} MCParticles_p4 = vector.awk( - awkward.zip( - {"mass": gen_arr["mass"], "x": gen_arr["momentum.x"], "y": gen_arr["momentum.y"], "z": gen_arr["momentum.z"]} - ) + awkward.zip({"mass": gen_arr["mass"], "x": gen_arr["momentum.x"], "y": gen_arr["momentum.y"], "z": gen_arr["momentum.z"]}) ) gen_arr["pt"] = MCParticles_p4.pt gen_arr["eta"] = MCParticles_p4.eta @@ -535,9 +528,7 @@ def track_to_features(dataset, prop_data, iev): if dataset == "clic": ret[k] = awkward.to_numpy(prop_data["SiTracks_1"]["SiTracks_1." + k][iev][trackstate_idx]) elif dataset == "fcc": - ret[k] = awkward.to_numpy( - prop_data["_SiTracks_Refitted_trackStates"]["_SiTracks_Refitted_trackStates." + k][iev][trackstate_idx] - ) + ret[k] = awkward.to_numpy(prop_data["_SiTracks_Refitted_trackStates"]["_SiTracks_Refitted_trackStates." + k][iev][trackstate_idx]) else: raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") @@ -626,9 +617,7 @@ def add_daughters_to_status1(gen_features, genparticle_to_hit, genparticle_to_tr def get_genparticles_and_adjacencies(dataset, prop_data, hit_data, calohit_links, sitrack_links, iev, collectionIDs): gen_features = gen_to_features(dataset, prop_data, iev) - hit_features, genparticle_to_hit, hit_idx_local_to_global = get_calohit_matrix_and_genadj( - dataset, hit_data, calohit_links, iev, collectionIDs - ) + hit_features, genparticle_to_hit, hit_idx_local_to_global = get_calohit_matrix_and_genadj(dataset, hit_data, calohit_links, iev, collectionIDs) hit_to_cluster = hit_cluster_adj(dataset, prop_data, hit_idx_local_to_global, iev) cluster_features = cluster_to_features(prop_data, hit_features, hit_to_cluster, iev) track_features = track_to_features(dataset, prop_data, iev) @@ -638,9 +627,7 @@ def get_genparticles_and_adjacencies(dataset, prop_data, hit_data, calohit_links mask_status1 = gen_features["generatorStatus"] == 1 if gen_features["index"] is not None: # if there are even daughters - genparticle_to_hit, genparticle_to_trk = add_daughters_to_status1( - gen_features, genparticle_to_hit, genparticle_to_trk - ) + genparticle_to_hit, genparticle_to_trk = add_daughters_to_status1(gen_features, genparticle_to_hit, genparticle_to_trk) n_gp = awkward.count(gen_features["PDG"]) n_track = awkward.count(track_features["type"]) @@ -648,11 +635,7 @@ def get_genparticles_and_adjacencies(dataset, prop_data, hit_data, calohit_links n_cluster = awkward.count(cluster_features["type"]) if len(genparticle_to_trk[0]) > 0: - gp_to_track = ( - coo_matrix((genparticle_to_trk[2], (genparticle_to_trk[0], genparticle_to_trk[1])), shape=(n_gp, n_track)) - .max(axis=1) - .todense() - ) + gp_to_track = coo_matrix((genparticle_to_trk[2], (genparticle_to_trk[0], genparticle_to_trk[1])), shape=(n_gp, n_track)).max(axis=1).todense() else: gp_to_track = np.zeros((n_gp, 1)) @@ -682,9 +665,7 @@ def get_genparticles_and_adjacencies(dataset, prop_data, hit_data, calohit_links if len(np.array(mask_visible)) == 1: # event has only one particle (then index will be empty because no daughters) - gen_features = awkward.Record( - {feat: (gen_features[feat][mask_visible] if feat != "index" else None) for feat in gen_features.keys()} - ) + gen_features = awkward.Record({feat: (gen_features[feat][mask_visible] if feat != "index" else None) for feat in gen_features.keys()}) else: gen_features = awkward.Record({feat: gen_features[feat][mask_visible] for feat in gen_features.keys()}) @@ -717,12 +698,8 @@ def assign_genparticles_to_obj_and_merge(gpdata): ).todense() ) - gp_to_calohit = coo_matrix( - (gpdata.genparticle_to_hit[2], (gpdata.genparticle_to_hit[0], gpdata.genparticle_to_hit[1])), shape=(n_gp, n_hit) - ) - calohit_to_cluster = coo_matrix( - (gpdata.hit_to_cluster[2], (gpdata.hit_to_cluster[0], gpdata.hit_to_cluster[1])), shape=(n_hit, n_cluster) - ) + gp_to_calohit = coo_matrix((gpdata.genparticle_to_hit[2], (gpdata.genparticle_to_hit[0], gpdata.genparticle_to_hit[1])), shape=(n_gp, n_hit)) + calohit_to_cluster = coo_matrix((gpdata.hit_to_cluster[2], (gpdata.hit_to_cluster[0], gpdata.hit_to_cluster[1])), shape=(n_hit, n_cluster)) gp_to_cluster = np.array((gp_to_calohit * calohit_to_cluster).todense()) @@ -902,9 +879,7 @@ def get_reco_properties(dataset, prop_data, iev): raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") reco_p4 = vector.awk( - awkward.zip( - {"mass": reco_arr["mass"], "x": reco_arr["momentum.x"], "y": reco_arr["momentum.y"], "z": reco_arr["momentum.z"]} - ) + awkward.zip({"mass": reco_arr["mass"], "x": reco_arr["momentum.x"], "y": reco_arr["momentum.y"], "z": reco_arr["momentum.z"]}) ) reco_arr["pt"] = reco_p4.pt reco_arr["eta"] = reco_p4.eta @@ -1214,29 +1189,19 @@ def process_one_file(fn, ofn, dataset): assert np.all(used_rps == 1) gps_track = get_particle_feature_matrix(track_to_gp_all, gpdata_cleaned.gen_features, particle_feature_order) - gps_track[:, 0] = np.array( - [map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(gps_track[:, 0], gps_track[:, 1])] - ) + gps_track[:, 0] = np.array([map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(gps_track[:, 0], gps_track[:, 1])]) gps_cluster = get_particle_feature_matrix(cluster_to_gp_all, gpdata_cleaned.gen_features, particle_feature_order) - gps_cluster[:, 0] = np.array( - [map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(gps_cluster[:, 0], gps_cluster[:, 1])] - ) + gps_cluster[:, 0] = np.array([map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(gps_cluster[:, 0], gps_cluster[:, 1])]) gps_cluster[:, 1] = 0 rps_track = get_particle_feature_matrix(track_to_rp_all, reco_features, particle_feature_order) - rps_track[:, 0] = np.array( - [map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(rps_track[:, 0], rps_track[:, 1])] - ) + rps_track[:, 0] = np.array([map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(rps_track[:, 0], rps_track[:, 1])]) rps_cluster = get_particle_feature_matrix(cluster_to_rp_all, reco_features, particle_feature_order) - rps_cluster[:, 0] = np.array( - [map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(rps_cluster[:, 0], rps_cluster[:, 1])] - ) + rps_cluster[:, 0] = np.array([map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(rps_cluster[:, 0], rps_cluster[:, 1])]) rps_cluster[:, 1] = 0 # all initial gen/reco particle energy must be reconstructable - assert ( - abs(np.sum(gps_track[:, 6]) + np.sum(gps_cluster[:, 6]) - np.sum(gpdata_cleaned.gen_features["energy"])) < 1e-2 - ) + assert abs(np.sum(gps_track[:, 6]) + np.sum(gps_cluster[:, 6]) - np.sum(gpdata_cleaned.gen_features["energy"])) < 1e-2 assert abs(np.sum(rps_track[:, 6]) + np.sum(rps_cluster[:, 6]) - np.sum(reco_features["energy"])) < 1e-2 @@ -1283,9 +1248,7 @@ def process_one_file(fn, ofn, dataset): sorted_jet_idx = awkward.argsort(target_jets.pt, axis=-1, ascending=False).to_list() target_jets_indices = target_jets_indices.to_list() for jet_idx in sorted_jet_idx: - jet_constituents = [ - index_mapping[idx] for idx in target_jets_indices[jet_idx] - ] # map back to constituent index *before* masking + jet_constituents = [index_mapping[idx] for idx in target_jets_indices[jet_idx]] # map back to constituent index *before* masking ytarget_constituents[jet_constituents] = jet_idx ytarget_track_constituents = ytarget_constituents[: len(ytarget_track)] ytarget_cluster_constituents = ytarget_constituents[len(ytarget_track) :] diff --git a/mlpf/model/PFDataset.py b/mlpf/model/PFDataset.py index b2bb3f733..07fd68cae 100644 --- a/mlpf/model/PFDataset.py +++ b/mlpf/model/PFDataset.py @@ -116,9 +116,7 @@ def __init__(self, data_dir, name, split, num_samples=None, sort=False): builder = tfds.builder(name, data_dir=data_dir) except Exception: _logger.error( - "Could not find dataset {} in {}, please check that you have downloaded the correct version of the dataset".format( - name, data_dir - ) + "Could not find dataset {} in {}, please check that you have downloaded the correct version of the dataset".format(name, data_dir) ) sys.exit(1) self.ds = TFDSDataSource(builder.as_data_source(split=split), sort=sort) @@ -157,9 +155,7 @@ def to(self, device, **kwargs): class Collater: def __init__(self, per_particle_keys_to_get, per_event_keys_to_get, **kwargs): super(Collater, self).__init__(**kwargs) - self.per_particle_keys_to_get = ( - per_particle_keys_to_get # these quantities are a variable-length tensor per each event - ) + self.per_particle_keys_to_get = per_particle_keys_to_get # these quantities are a variable-length tensor per each event self.per_event_keys_to_get = per_event_keys_to_get # these quantities are one value (scalar) per event def __call__(self, inputs): @@ -167,9 +163,7 @@ def __call__(self, inputs): # per-particle quantities need to be padded across events of different size for key_to_get in self.per_particle_keys_to_get: - ret[key_to_get] = torch.nn.utils.rnn.pad_sequence( - [torch.tensor(inp[key_to_get]).to(torch.float32) for inp in inputs], batch_first=True - ) + ret[key_to_get] = torch.nn.utils.rnn.pad_sequence([torch.tensor(inp[key_to_get]).to(torch.float32) for inp in inputs], batch_first=True) # per-event quantities can be stacked across events for key_to_get in self.per_event_keys_to_get: @@ -266,9 +260,7 @@ def get_interleaved_dataloaders(world_size, rank, config, use_cuda, use_ray): loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, - collate_fn=Collater( - ["X", "ytarget", "ytarget_pt_orig", "ytarget_e_orig", "genjets", "targetjets"], ["genmet"] - ), + collate_fn=Collater(["X", "ytarget", "ytarget_pt_orig", "ytarget_e_orig", "genjets", "targetjets"], ["genmet"]), sampler=sampler, num_workers=config["num_workers"], prefetch_factor=config["prefetch_factor"],