Skip to content

Commit

Permalink
fix: remove command line argument --dataset (#358)
Browse files Browse the repository at this point in the history
* fix: remove command line argument --dataset

The dataset is specified in the parameter configuration files only.

* remove use of --dataset in scripts
  • Loading branch information
erwulff authored Oct 24, 2024
1 parent 1d2c5a2 commit 014a3ee
Show file tree
Hide file tree
Showing 10 changed files with 11 additions and 19 deletions.
4 changes: 2 additions & 2 deletions mlpf/model/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,12 +1059,12 @@ def run(rank, world_size, config, args, outdir, logfile):
torch.cuda.empty_cache()

# FIXME: import this from a central place
if args.dataset == "clic":
if config["dataset"] == "clic":
import fastjet

jetdef = fastjet.JetDefinition(fastjet.ee_genkt_algorithm, 0.4, -1.0)
jet_ptcut = 5
elif args.dataset == "cms":
if config["dataset"] == "cms":
import fastjet

jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
Expand Down
8 changes: 0 additions & 8 deletions mlpf/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,6 @@
parser.add_argument("--data-dir", type=str, default=None, help="path to `tensorflow_datasets/`")
parser.add_argument("--gpus", type=int, default=None, help="to use CPU set to 0; else e.g., 4")
parser.add_argument("--gpu-batch-multiplier", type=int, default=None, help="Increase batch size per GPU by this constant factor")
parser.add_argument(
"--dataset",
type=str,
default=None,
choices=["clic", "cms"],
required=True,
help="which dataset?",
)
parser.add_argument("--num-workers", type=int, default=None, help="number of processes to load the data")
parser.add_argument("--prefetch-factor", type=int, default=None, help="number of samples to fetch & prefetch at every call")
parser.add_argument("--resume-training", type=str, default=None, help="training dir containing the checkpointed training to resume")
Expand Down
4 changes: 2 additions & 2 deletions scripts/local_test_torch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ tfds build mlpf/heptfds/cms_pf/ttbar --manual_dir ./local_test_data
mkdir -p experiments

#test transformer with onnx export
python mlpf/pipeline.py --config parameters/pytorch/pyg-cms.yaml --dataset cms --data-dir ./tensorflow_datasets/ \
python mlpf/pipeline.py --config parameters/pytorch/pyg-cms.yaml --data-dir ./tensorflow_datasets/ \
--prefix MLPF_test_ --num-epochs 2 --nvalid 1 --gpus 0 --train --test --make-plots --conv-type attention \
--export-onnx --pipeline --dtype float32 --attention-type math --num-convs 1

# test Ray Train training
python mlpf/pipeline.py --config parameters/pytorch/pyg-cms.yaml --dataset cms --data-dir ${PWD}/tensorflow_datasets/ \
python mlpf/pipeline.py --config parameters/pytorch/pyg-cms.yaml --data-dir ${PWD}/tensorflow_datasets/ \
--prefix MLPF_test_ --num-epochs 2 --nvalid 1 --gpus 0 --train --ray-train --ray-cpus 2 --local --conv-type attention \
--pipeline --dtype float32 --attention-type math --num-convs 1 --experiments-dir ${PWD}/experiments
2 changes: 1 addition & 1 deletion scripts/lumi/pytorch-clic-8.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ singularity exec \
-B /tmp \
--env LD_LIBRARY_PATH=/opt/rocm/lib/ \
--env CUDA_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES \
$IMG python3 mlpf/pipeline.py --dataset clic --gpus 8 \
$IMG python3 mlpf/pipeline.py --gpus 8 \
--data-dir $TFDS_DATA_DIR --config parameters/pytorch/pyg-clic.yaml \
--train --gpu-batch-multiplier 128 --num-workers 8 --prefetch-factor 100 --checkpoint-freq 1 --conv-type attention --dtype bfloat16 --lr 0.0001 --num-epochs 30
2 changes: 1 addition & 1 deletion scripts/lumi/pytorch-cms-8.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ singularity exec \
-B /tmp \
--env LD_LIBRARY_PATH=/opt/rocm/lib/ \
--env CUDA_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES \
$IMG python3 mlpf/pipeline.py --dataset cms --gpus 8 \
$IMG python3 mlpf/pipeline.py --gpus 8 \
--data-dir $TFDS_DATA_DIR --config parameters/pytorch/pyg-cms.yaml \
--train --gpu-batch-multiplier 6 --num-workers 8 --prefetch-factor 100 --checkpoint-freq 1 --conv-type attention --dtype bfloat16 --lr 0.0001
2 changes: 1 addition & 1 deletion scripts/tallinn/a100-mig/pytorch-small-eval-clic.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ WEIGHTS=experiments/pyg-clic_20241001_215132_345408/checkpoints/checkpoint-26-2.
singularity exec -B /scratch/persistent --nv \
--env PYTHONPATH=`pwd` \
--env KERAS_BACKEND=torch \
$IMG python3 mlpf/pipeline.py --dataset clic --gpus 1 \
$IMG python3 mlpf/pipeline.py --gpus 1 \
--data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-clic.yaml \
--test --make-plots --gpu-batch-multiplier 100 --load $WEIGHTS --dtype bfloat16 --prefetch-factor 10 --num-workers 8 --load $WEIGHTS --ntest 50000
2 changes: 1 addition & 1 deletion scripts/tallinn/a100-mig/pytorch-small-eval-cms.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ env
singularity exec -B /scratch/persistent --nv \
--env PYTHONPATH=`pwd` \
--env KERAS_BACKEND=torch \
$IMG python mlpf/pipeline.py --dataset cms --gpus 1 \
$IMG python mlpf/pipeline.py --gpus 1 \
--data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \
--test --make-plots --gpu-batch-multiplier 2 --load $WEIGHTS --ntest 10000 --dtype bfloat16 --num-workers 8 --prefetch-factor 10
2 changes: 1 addition & 1 deletion scripts/tallinn/a100-mig/pytorch-small.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ ulimit -n 10000
singularity exec -B /scratch/persistent --nv \
--env PYTHONPATH=hep_tfds \
--env KERAS_BACKEND=torch \
$IMG python3 mlpf/pipeline.py --dataset cms --gpus 1 \
$IMG python3 mlpf/pipeline.py --gpus 1 \
--data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \
--train --test --make-plots --conv-type attention --attention-type flash --gpu-batch-multiplier 1 --num-workers 1 --prefetch-factor 10 --dtype bfloat16 --checkpoint-freq -1 --ntrain 100 --nvalid 100 --ntest 100 --num-epochs 10
2 changes: 1 addition & 1 deletion scripts/tallinn/a100/pytorch-clic.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ ulimit -n 10000
singularity exec -B /scratch/persistent --nv \
--env PYTHONPATH=`pwd` \
--env KERAS_BACKEND=torch \
$IMG python3 mlpf/pipeline.py --dataset clic --gpus 1 \
$IMG python3 mlpf/pipeline.py --gpus 1 \
--data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-clic.yaml \
--train --test --make-plots --conv-type attention --num-epochs 20 --gpu-batch-multiplier 256 --num-workers 4 --prefetch-factor 100 --checkpoint-freq 1 --comet --attention-type math --dtype bfloat16
2 changes: 1 addition & 1 deletion scripts/tallinn/a100/pytorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ulimit -n 10000
singularity exec -B /scratch/persistent --nv \
--env PYTHONPATH=`pwd` \
--env KERAS_BACKEND=torch \
$IMG python3 mlpf/pipeline.py --dataset cms --gpus 1 \
$IMG python3 mlpf/pipeline.py --gpus 1 \
--data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \
--train --test --make-plots --conv-type attention \
--gpu-batch-multiplier 8 --checkpoint-freq 1 --num-workers 8 --prefetch-factor 50 --comet

0 comments on commit 014a3ee

Please sign in to comment.