fix: remove command line argument --dataset (#358)

* fix: remove command line argument --dataset The dataset is specified in the parameter configuration files only. * remove use of --dataset in scripts
jpata · Oct 24, 2024 · 014a3ee · 014a3ee
1 parent 1d2c5a2
commit 014a3ee
Show file tree

Hide file tree

Showing 10 changed files with 11 additions and 19 deletions.
diff --git a/mlpf/model/training.py b/mlpf/model/training.py
@@ -1059,12 +1059,12 @@ def run(rank, world_size, config, args, outdir, logfile):
             torch.cuda.empty_cache()
 
             # FIXME: import this from a central place
-            if args.dataset == "clic":
+            if config["dataset"] == "clic":
                 import fastjet
 
                 jetdef = fastjet.JetDefinition(fastjet.ee_genkt_algorithm, 0.4, -1.0)
                 jet_ptcut = 5
-            elif args.dataset == "cms":
+            if config["dataset"] == "cms":
                 import fastjet
 
                 jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)

diff --git a/mlpf/pipeline.py b/mlpf/pipeline.py
@@ -30,14 +30,6 @@
 parser.add_argument("--data-dir", type=str, default=None, help="path to `tensorflow_datasets/`")
 parser.add_argument("--gpus", type=int, default=None, help="to use CPU set to 0; else e.g., 4")
 parser.add_argument("--gpu-batch-multiplier", type=int, default=None, help="Increase batch size per GPU by this constant factor")
-parser.add_argument(
-    "--dataset",
-    type=str,
-    default=None,
-    choices=["clic", "cms"],
-    required=True,
-    help="which dataset?",
-)
 parser.add_argument("--num-workers", type=int, default=None, help="number of processes to load the data")
 parser.add_argument("--prefetch-factor", type=int, default=None, help="number of samples to fetch & prefetch at every call")
 parser.add_argument("--resume-training", type=str, default=None, help="training dir containing the checkpointed training to resume")

diff --git a/scripts/local_test_torch.sh b/scripts/local_test_torch.sh
@@ -31,11 +31,11 @@ tfds build mlpf/heptfds/cms_pf/ttbar --manual_dir ./local_test_data
 mkdir -p experiments
 
 #test transformer with onnx export
-python mlpf/pipeline.py --config parameters/pytorch/pyg-cms.yaml --dataset cms --data-dir ./tensorflow_datasets/ \
+python mlpf/pipeline.py --config parameters/pytorch/pyg-cms.yaml --data-dir ./tensorflow_datasets/ \
   --prefix MLPF_test_ --num-epochs 2 --nvalid 1 --gpus 0 --train --test --make-plots --conv-type attention \
   --export-onnx --pipeline --dtype float32 --attention-type math --num-convs 1
 
 # test Ray Train training
-python mlpf/pipeline.py --config parameters/pytorch/pyg-cms.yaml --dataset cms --data-dir ${PWD}/tensorflow_datasets/ \
+python mlpf/pipeline.py --config parameters/pytorch/pyg-cms.yaml --data-dir ${PWD}/tensorflow_datasets/ \
 	--prefix MLPF_test_ --num-epochs 2 --nvalid 1 --gpus 0 --train --ray-train --ray-cpus 2 --local --conv-type attention \
 	--pipeline --dtype float32 --attention-type math --num-convs 1 --experiments-dir ${PWD}/experiments
diff --git a/scripts/lumi/pytorch-clic-8.sh b/scripts/lumi/pytorch-clic-8.sh
@@ -38,6 +38,6 @@ singularity exec \
     -B /tmp \
     --env LD_LIBRARY_PATH=/opt/rocm/lib/ \
     --env CUDA_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES \
-     $IMG python3 mlpf/pipeline.py --dataset clic --gpus 8 \
+     $IMG python3 mlpf/pipeline.py --gpus 8 \
      --data-dir $TFDS_DATA_DIR --config parameters/pytorch/pyg-clic.yaml \
      --train --gpu-batch-multiplier 128 --num-workers 8 --prefetch-factor 100 --checkpoint-freq 1 --conv-type attention --dtype bfloat16 --lr 0.0001 --num-epochs 30
diff --git a/scripts/lumi/pytorch-cms-8.sh b/scripts/lumi/pytorch-cms-8.sh
@@ -38,6 +38,6 @@ singularity exec \
     -B /tmp \
     --env LD_LIBRARY_PATH=/opt/rocm/lib/ \
     --env CUDA_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES \
-     $IMG python3 mlpf/pipeline.py --dataset cms --gpus 8 \
+     $IMG python3 mlpf/pipeline.py --gpus 8 \
      --data-dir $TFDS_DATA_DIR --config parameters/pytorch/pyg-cms.yaml \
      --train --gpu-batch-multiplier 6 --num-workers 8 --prefetch-factor 100 --checkpoint-freq 1 --conv-type attention --dtype bfloat16 --lr 0.0001
diff --git a/scripts/tallinn/a100-mig/pytorch-small-eval-clic.sh b/scripts/tallinn/a100-mig/pytorch-small-eval-clic.sh
@@ -11,6 +11,6 @@ WEIGHTS=experiments/pyg-clic_20241001_215132_345408/checkpoints/checkpoint-26-2.
 singularity exec -B /scratch/persistent --nv \
      --env PYTHONPATH=`pwd` \
      --env KERAS_BACKEND=torch \
-     $IMG  python3 mlpf/pipeline.py --dataset clic --gpus 1 \
+     $IMG  python3 mlpf/pipeline.py --gpus 1 \
      --data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-clic.yaml \
      --test --make-plots --gpu-batch-multiplier 100 --load $WEIGHTS --dtype bfloat16 --prefetch-factor 10 --num-workers 8 --load $WEIGHTS --ntest 50000
diff --git a/scripts/tallinn/a100-mig/pytorch-small-eval-cms.sh b/scripts/tallinn/a100-mig/pytorch-small-eval-cms.sh
@@ -12,6 +12,6 @@ env
 singularity exec -B /scratch/persistent --nv \
      --env PYTHONPATH=`pwd` \
      --env KERAS_BACKEND=torch \
-     $IMG python mlpf/pipeline.py --dataset cms --gpus 1 \
+     $IMG python mlpf/pipeline.py --gpus 1 \
      --data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \
      --test --make-plots --gpu-batch-multiplier 2 --load $WEIGHTS --ntest 10000 --dtype bfloat16 --num-workers 8 --prefetch-factor 10
diff --git a/scripts/tallinn/a100-mig/pytorch-small.sh b/scripts/tallinn/a100-mig/pytorch-small.sh
@@ -13,6 +13,6 @@ ulimit -n 10000
 singularity exec -B /scratch/persistent --nv \
     --env PYTHONPATH=hep_tfds \
     --env KERAS_BACKEND=torch \
-    $IMG python3 mlpf/pipeline.py --dataset cms --gpus 1 \
+    $IMG python3 mlpf/pipeline.py --gpus 1 \
     --data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \
     --train --test --make-plots --conv-type attention --attention-type flash --gpu-batch-multiplier 1 --num-workers 1 --prefetch-factor 10 --dtype bfloat16 --checkpoint-freq -1 --ntrain 100 --nvalid 100 --ntest 100 --num-epochs 10
diff --git a/scripts/tallinn/a100/pytorch-clic.sh b/scripts/tallinn/a100/pytorch-clic.sh
@@ -11,6 +11,6 @@ ulimit -n 10000
 singularity exec -B /scratch/persistent --nv \
     --env PYTHONPATH=`pwd` \
     --env KERAS_BACKEND=torch \
-    $IMG python3 mlpf/pipeline.py --dataset clic --gpus 1 \
+    $IMG python3 mlpf/pipeline.py --gpus 1 \
     --data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-clic.yaml \
     --train --test --make-plots --conv-type attention --num-epochs 20 --gpu-batch-multiplier 256 --num-workers 4 --prefetch-factor 100 --checkpoint-freq 1 --comet --attention-type math --dtype bfloat16
diff --git a/scripts/tallinn/a100/pytorch.sh b/scripts/tallinn/a100/pytorch.sh
@@ -11,7 +11,7 @@ ulimit -n 10000
 singularity exec -B /scratch/persistent --nv \
     --env PYTHONPATH=`pwd` \
     --env KERAS_BACKEND=torch \
-    $IMG python3 mlpf/pipeline.py --dataset cms --gpus 1 \
+    $IMG python3 mlpf/pipeline.py --gpus 1 \
     --data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \
     --train --test --make-plots --conv-type attention \
     --gpu-batch-multiplier 8 --checkpoint-freq 1 --num-workers 8 --prefetch-factor 50 --comet