Skip to content

Commit

Permalink
filetag for submissions
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Feb 20, 2025
1 parent 85aabad commit 796ed05
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 8 deletions.
1 change: 0 additions & 1 deletion condor/check_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"--processor",
help="which processor",
type=str,
choices=["trigger_boosted", "skimmer", "matching"],
required=True,
)

Expand Down
2 changes: 1 addition & 1 deletion condor/submit.templ.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ cd ..

# run code
# pip install --user onnxruntime
python -u -W ignore $script --year $year --starti $starti --endi $endi --samples $sample --subsamples $subsample --processor $processor --maxchunks $maxchunks --chunksize $chunksize ${save_root} ${save_systematics} --nano-version ${nano_version} $processor_args
python -u -W ignore $script --year $year --starti $starti --endi $endi --file-tag $filetag --samples $sample --subsamples $subsample --processor $processor --maxchunks $maxchunks --chunksize $chunksize ${save_root} ${save_systematics} --nano-version ${nano_version} $processor_args

#move output to t2s
for t2_prefix in ${t2_prefixes}
Expand Down
13 changes: 7 additions & 6 deletions src/boostedhh/run_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def parse_common_run_args(parser):
help="# of outputs to combine into a single output if saving .parquet or .root files",
)
parser.add_argument("--yaml", default=None, help="yaml file", type=str)
parser.add_argument("--file-tag", default=None, help="optional output file tag", type=str)


def parse_common_hh_args(parser):
Expand Down Expand Up @@ -232,7 +233,7 @@ def run(
):
"""
Run processor without fancy dask (outputs then need to be accumulated manually)
batch_size (int): used to combine a ``batch_size`` number of outputs into one parquet / root
"""
add_mixins(nanoevents) # update nanoevents schema
Expand Down Expand Up @@ -287,19 +288,19 @@ def run(

with Path(f"{outdir}/{filetag}.pkl").open("wb") as f:
pickle.dump(out, f)

if save_parquet or save_root:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

# Get all parquet files
path = Path(local_parquet_dir)
parquet_files = list(path.glob("*.parquet"))

num_batches = int(np.ceil(len(parquet_files) / batch_size))
Path(f"num_batches_{filetag}_{num_batches}.txt").touch()

# need to combine all the files from these processors before transferring to EOS
# otherwise it will complain about too many small files
for i in range(num_batches):
Expand All @@ -308,7 +309,7 @@ def run(
print(batch)
print([pd.read_parquet(f) for f in batch])
pddf = pd.concat([pd.read_parquet(f) for f in batch])

if save_parquet:
# need to write with pyarrow as pd.to_parquet doesn't support different types in
# multi-index column names
Expand Down
1 change: 1 addition & 0 deletions src/boostedhh/submit_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ def submit(
"chunksize": args.chunksize,
"t2_prefixes": " ".join(t2_prefixes),
"outdir": sample_dir,
"filetag": j,
"jobnum": j,
"save_root": ("--save-root" if args.save_root else "--no-save-root"),
"nano_version": args.nano_version,
Expand Down

0 comments on commit 796ed05

Please sign in to comment.