Skip to content

Commit

Permalink
Increase XRootD timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
piperov committed Mar 8, 2024
1 parent e803957 commit d3d9272
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 5 deletions.
4 changes: 2 additions & 2 deletions af_benchmark/uproot_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def __init__(self, config):
self.config = config

def open_nanoaod(self, file_path, **kwargs):
tree = uproot.open(file_path)["Events"]
tree = uproot.open(file_path,timeout=300)["Events"]
return tree

def get_column_list(self, file):
Expand Down Expand Up @@ -120,4 +120,4 @@ def run_operation(self, column_data, **kwargs):
np.sum(data_in_memory)




131 changes: 129 additions & 2 deletions notebooks/3.2_Data-locations.ipynb
Original file line number Diff line number Diff line change
@@ -1,12 +1,139 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5d8f1484-dbb2-4711-8db9-5631d5e8cf66",
"metadata": {},
"source": [
"In this benchmark, we compare throughput (MB/s) while reading a single ROOT file from different (geographical) locations, using different number of local workers."
]
},
{
"cell_type": "markdown",
"id": "e7172688-ce9d-4cbe-b867-fbc2dcaf5b21",
"metadata": {},
"source": [
"### Purdue EOS, FUSE mounted"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a9896f9f-8e9c-4de9-a48b-52c6967ce406",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"env: XRD_REQUESTTIMEOUT=300\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-03-08 09:13:04,747 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:13:04,748 - distributed.utils - INFO - Reload module tmpah7fy39u from .py file\n",
"2024-03-08 09:13:04,748 - distributed.preloading - INFO - Import preload module: /tmp/tmpah7fy39u.py\n",
"2024-03-08 09:13:04,753 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:13:04,753 - distributed.utils - INFO - Reload module tmpij2gz_bk from .py file\n",
"2024-03-08 09:13:04,754 - distributed.preloading - INFO - Import preload module: /tmp/tmpij2gz_bk.py\n",
"2024-03-08 09:13:04,774 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:13:04,775 - distributed.utils - INFO - Reload module tmp597z0edi from .py file\n",
"2024-03-08 09:13:04,775 - distributed.preloading - INFO - Import preload module: /tmp/tmp597z0edi.py\n",
"2024-03-08 09:13:04,779 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:13:04,780 - distributed.utils - INFO - Reload module tmp7jbpsq0g from .py file\n",
"2024-03-08 09:13:04,780 - distributed.preloading - INFO - Import preload module: /tmp/tmp7jbpsq0g.py\n",
"2024-03-08 09:13:04,785 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:13:04,785 - distributed.utils - INFO - Reload module tmpm0w8d3hh from .py file\n",
"2024-03-08 09:13:04,785 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:13:04,786 - distributed.preloading - INFO - Import preload module: /tmp/tmpm0w8d3hh.py\n",
"2024-03-08 09:13:04,786 - distributed.utils - INFO - Reload module tmpj_nz70zi from .py file\n",
"2024-03-08 09:13:04,786 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:13:04,787 - distributed.preloading - INFO - Import preload module: /tmp/tmpj_nz70zi.py\n",
"2024-03-08 09:13:04,787 - distributed.utils - INFO - Reload module tmphqb2mn4j from .py file\n",
"2024-03-08 09:13:04,788 - distributed.preloading - INFO - Import preload module: /tmp/tmphqb2mn4j.py\n",
"2024-03-08 09:13:04,792 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:13:04,792 - distributed.utils - INFO - Reload module tmpv_uqobai from .py file\n",
"2024-03-08 09:13:04,793 - distributed.preloading - INFO - Import preload module: /tmp/tmpv_uqobai.py\n",
" 25%|██▌ | 1/4 [00:31<01:35, 31.81s/it]2024-03-08 09:15:49,222 - distributed.utils_perf - WARNING - full garbage collections took 38% CPU time recently (threshold: 10%)\n",
" 50%|█████ | 2/4 [02:45<03:03, 91.50s/it]2024-03-08 09:15:52,457 - distributed.preloading - INFO - Creating preload: import sys; sys.path.insert(0, '/home/spiperov/purdue-af-demo-may2023/');\n",
"2024-03-08 09:15:52,457 - distributed.utils - INFO - Reload module tmph9vv4rme from .py file\n",
"2024-03-08 09:15:52,458 - distributed.preloading - INFO - Import preload module: /tmp/tmph9vv4rme.py\n"
]
}
],
"source": [
"import os, sys\n",
"sys.path.append(os.getcwd()+\"/../af_benchmark\")\n",
"from benchmark import Benchmark, run_benchmark\n",
"import numpy as np\n",
"import tqdm\n",
"import time\n",
"from dask_gateway import Gateway\n",
"%env XRD_REQUESTTIMEOUT 300\n",
"\n",
"nworkers_options = [1,2,4,8,16]\n",
"col_num_options = [30,120,240,300]\n",
"# nworkers_options = [8]\n",
"# col_num_options = [1200]\n",
"\n",
"b = Benchmark(\"config_3.2.yaml\")\n",
"for nworkers in nworkers_options:\n",
" for col_num in tqdm.tqdm(col_num_options):\n",
" b.config[\"executor\"][\"n_workers\"] = int(nworkers)\n",
" b.config[\"processor\"][\"columns\"] = int(col_num)\n",
" b.reset(keep_cluster=True, reset_workers=True)\n",
" b.run()\n",
" b.update_report()\n",
"\n",
"report = b.report_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "505cf3c2-0ac1-4f4d-bb94-017930ea624e",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"report[\"MB/s (compressed)\"] = report.compressed_bytes/report.process_columns/(1024*1024)\n",
"\n",
"report_by_ncols = report.groupby('n_columns_read')\n",
"\n",
"plt.figure(figsize=(8, 6))\n",
"for ncols, group in report_by_ncols:\n",
" plt.plot(group.n_workers, group[\"MB/s (compressed)\"], label=f\"{ncols} columns\")\n",
"\n",
"plt.xlabel('# workers')\n",
"plt.ylabel('MB/s')\n",
"plt.legend()\n",
"plt.xlim(0, report.n_workers.max()+5)\n",
"plt.ylim(0, report['MB/s (compressed)'].max() + 5)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f6ca63b-47ae-4579-bacc-0fc9c69b8e65",
"id": "477bae91-17da-4d96-bad3-f1092be55400",
"metadata": {},
"outputs": [],
"source": []
"source": [
"plt.figure(figsize=(8, 6))\n",
"for ncols, group in report_by_ncols:\n",
" plt.plot(group.n_workers, group.process_columns, label=f\"{ncols} columns\")\n",
"\n",
"plt.xlabel('# workers')\n",
"plt.ylabel('Time (s)')\n",
"plt.legend()\n",
"plt.xlim(0, report.n_workers.max()+5)\n",
"plt.ylim(0, report.run.max() + 5)\n",
"plt.show()"
]
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion notebooks/dask-gateway-setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
" worker_memory = 4,\n",
" env = {\n",
" \"WORKDIR\": \"/depot/cms/users/dkondra/af-benchmark/af_benchmark/\",\n",
" \"X509_USER_PROXY\": \"/depot/cms/users/dkondra/x509up_u616617\",\n",
" \"X509_USER_PROXY\": \"/home/spiperov/x509up_u638764\",\n",
" }\n",
")\n",
"\n",
Expand Down

0 comments on commit d3d9272

Please sign in to comment.