diff --git a/notebooks/2.1_Single-file.ipynb b/notebooks/2.1_Single-file.ipynb index e2cff51..2cca93e 100644 --- a/notebooks/2.1_Single-file.ipynb +++ b/notebooks/2.1_Single-file.ipynb @@ -65,15 +65,15 @@ "}\n", "\n", "column_presets = {\n", - " # \"100pct\": {\n", - " # # the bechmark will limit this to actual total number of columns\n", - " # \"method\": \"n_columns\",\n", - " # \"values\": 100000\n", - " # },\n", - " # \"50pct\": {\n", - " # \"method\": \"collections\",\n", - " # \"values\": [\"Jet\", \"Photon\", \"Tau\", \"Electron\", \"Muon\"]\n", - " # },\n", + " \"100pct\": {\n", + " # the bechmark will limit this to actual total number of columns\n", + " \"method\": \"n_columns\",\n", + " \"values\": 100000\n", + " },\n", + " \"50pct\": {\n", + " \"method\": \"collections\",\n", + " \"values\": [\"Jet\", \"Photon\", \"Tau\", \"Electron\", \"Muon\"]\n", + " },\n", " \"10pct\": {\n", " \"method\": \"collections\",\n", " \"values\": [\"Muon\"]\n", @@ -101,7 +101,7 @@ " recreate_dir(save_dir)\n", "\n", " iconf = 0\n", - " repeat = 3\n", + " repeat = 5\n", "\n", " for f_label, file_loc in file_locations.items():\n", " for c_label, column_setup in column_presets.items():\n", @@ -139,31 +139,13 @@ "source": [ "# warning: all YAML files will be deleted from config directory before proceeding\n", "config_path = \"./configs_2.1\"\n", - "output_path = \"./outputs_2.1\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "8633cf12-d9d9-4201-8969-e40868ee2e65", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Directory ./configs_2.1 already exists, will clean all files from it.\n", - "Saved 30 config files to ./configs_2.1\n" - ] - } - ], - "source": [ + "output_path = \"./outputs_2.1\"\n", "generate_configs(config_path)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "d0ed6b4f-1ce8-4731-87e7-57da3b4abced", "metadata": {}, "outputs": [ @@ -178,7 +160,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 30/30 [17:30<00:00, 35.01s/it]\n" + " 1%| | 1/100 [00:19<31:37, 19.16s/it]" ] } ], @@ -233,260 +215,107 @@ " \n", " 0\n", " 1\n", - " 50\n", - " 154321550\n", + " 1249\n", + " 3854952319\n", " True\n", " 0\n", " sequential\n", " 1\n", - " 265594188\n", - " 1129031467\n", - " 30.459931\n", - " 0.030563\n", - " 24.061388\n", - " 10pct\n", + " 2048077850\n", + " 11568311796\n", + " 246.806087\n", + " 0.494070\n", + " 166.181469\n", + " 100pct\n", " depot\n", " \n", " \n", " 1\n", " 1\n", - " 50\n", - " 154321550\n", + " 1249\n", + " 3854952319\n", " True\n", " 0\n", " sequential\n", " 1\n", - " 265594188\n", - " 1129031467\n", - " 30.506349\n", - " 0.031068\n", - " 24.128228\n", - " 10pct\n", + " 2048077850\n", + " 11568311796\n", + " 242.890458\n", + " 0.494278\n", + " 162.216860\n", + " 100pct\n", " depot\n", " \n", " \n", " 2\n", " 1\n", - " 50\n", - " 154321550\n", + " 1249\n", + " 3854952319\n", " True\n", " 0\n", " sequential\n", " 1\n", - " 265594188\n", - " 1129031467\n", - " 30.462117\n", - " 0.030792\n", - " 24.097522\n", - " 10pct\n", + " 2048077850\n", + " 11568311796\n", + " 242.882858\n", + " 0.492016\n", + " 162.148665\n", + " 100pct\n", " depot\n", " \n", " \n", " 3\n", " 1\n", - " 50\n", - " 154321550\n", + " 1249\n", + " 3854952319\n", " True\n", " 0\n", " sequential\n", " 1\n", - " 265594188\n", - " 1129031467\n", - " 30.513377\n", - " 0.030538\n", - " 24.048350\n", - " 10pct\n", - " eos_fuse\n", + " 2048077850\n", + " 11568311796\n", + " 242.313256\n", + " 0.490587\n", + " 161.856776\n", + " 100pct\n", + " depot\n", " \n", " \n", " 4\n", " 1\n", - " 50\n", - " 154321550\n", + " 1249\n", + " 3854952319\n", " True\n", " 0\n", " sequential\n", " 1\n", - " 265594188\n", - " 1129031467\n", - " 30.442081\n", - " 0.030833\n", - " 24.080730\n", - " 10pct\n", - " eos_fuse\n", - " \n", - " \n", - " 5\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 30.533058\n", - " 0.030703\n", - " 24.076658\n", - " 10pct\n", - " eos_fuse\n", - " \n", - " \n", - " 6\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 34.893134\n", - " 0.030747\n", - " 24.201373\n", - " 10pct\n", - " work\n", - " \n", - " \n", - " 7\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 30.539464\n", - " 0.030596\n", - " 24.140102\n", - " 10pct\n", - " work\n", - " \n", - " \n", - " 8\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 30.569113\n", - " 0.030507\n", - " 24.164185\n", - " 10pct\n", - " work\n", - " \n", - " \n", - " 9\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 32.927919\n", - " 0.320015\n", - " 24.198597\n", - " 10pct\n", - " xcache\n", - " \n", - " \n", - " 10\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 33.032840\n", - " 0.316362\n", - " 24.168849\n", - " 10pct\n", - " xcache\n", - " \n", - " \n", - " 11\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 33.051579\n", - " 0.582236\n", - " 24.132116\n", - " 10pct\n", - " xcache\n", - " \n", - " \n", - " 12\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 104.772271\n", - " 72.213381\n", - " 24.164070\n", - " 10pct\n", - " xrootd\n", + " 2048077850\n", + " 11568311796\n", + " 243.378389\n", + " 0.494578\n", + " 162.048801\n", + " 100pct\n", + " depot\n", " \n", " \n", - " 13\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 62.811575\n", - " 30.291163\n", - " 24.132109\n", - " 10pct\n", - " xrootd\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 14\n", - " 1\n", - " 50\n", - " 154321550\n", - " True\n", - " 0\n", - " sequential\n", - " 1\n", - " 265594188\n", - " 1129031467\n", - " 55.180227\n", - " 23.014281\n", - " 24.131685\n", - " 10pct\n", - " xrootd\n", - " \n", - " \n", - " 15\n", + " 95\n", " 1\n", " 20\n", " 61728620\n", @@ -496,14 +325,14 @@ " 1\n", " 175488912\n", " 521753090\n", - " 16.648896\n", - " 0.010548\n", - " 13.257322\n", + " 31.169582\n", + " 13.705979\n", + " 13.310637\n", " 5pct\n", - " depot\n", + " xrootd\n", " \n", " \n", - " 16\n", + " 96\n", " 1\n", " 20\n", " 61728620\n", @@ -513,14 +342,14 @@ " 1\n", " 175488912\n", " 521753090\n", - " 16.239849\n", - " 0.010682\n", - " 13.230332\n", + " 69.664148\n", + " 52.051985\n", + " 13.323399\n", " 5pct\n", - " depot\n", + " xrootd\n", " \n", " \n", - " 17\n", + " 97\n", " 1\n", " 20\n", " 61728620\n", @@ -530,14 +359,14 @@ " 1\n", " 175488912\n", " 521753090\n", - " 16.274737\n", - " 0.010654\n", - " 13.235920\n", + " 67.827847\n", + " 50.425657\n", + " 13.305850\n", " 5pct\n", - " depot\n", + " xrootd\n", " \n", " \n", - " 18\n", + " 98\n", " 1\n", " 20\n", " 61728620\n", @@ -547,14 +376,14 @@ " 1\n", " 175488912\n", " 521753090\n", - " 16.194398\n", - " 0.010595\n", - " 13.220522\n", + " 65.893562\n", + " 48.255377\n", + " 13.326851\n", " 5pct\n", - " eos_fuse\n", + " xrootd\n", " \n", " \n", - " 19\n", + " 99\n", " 1\n", " 20\n", " 61728620\n", @@ -564,317 +393,548 @@ " 1\n", " 175488912\n", " 521753090\n", - " 16.217556\n", - " 0.010699\n", - " 13.240024\n", + " 67.729488\n", + " 50.291131\n", + " 13.313098\n", " 5pct\n", - " eos_fuse\n", + " xrootd\n", + " \n", + " \n", + "\n", + "

100 rows × 14 columns

\n", + "" + ], + "text/plain": [ + " n_files n_columns_read n_events loaded_columns \\\n", + "0 1 1249 3854952319 True \n", + "1 1 1249 3854952319 True \n", + "2 1 1249 3854952319 True \n", + "3 1 1249 3854952319 True \n", + "4 1 1249 3854952319 True \n", + ".. ... ... ... ... \n", + "95 1 20 61728620 True \n", + "96 1 20 61728620 True \n", + "97 1 20 61728620 True \n", + "98 1 20 61728620 True \n", + "99 1 20 61728620 True \n", + "\n", + " worker_operation_time executor n_workers compressed_bytes \\\n", + "0 0 sequential 1 2048077850 \n", + "1 0 sequential 1 2048077850 \n", + "2 0 sequential 1 2048077850 \n", + "3 0 sequential 1 2048077850 \n", + "4 0 sequential 1 2048077850 \n", + ".. ... ... ... ... \n", + "95 0 sequential 1 175488912 \n", + "96 0 sequential 1 175488912 \n", + "97 0 sequential 1 175488912 \n", + "98 0 sequential 1 175488912 \n", + "99 0 sequential 1 175488912 \n", + "\n", + " uncompressed_bytes time:run_processor time:wait time:decompress \\\n", + "0 11568311796 246.806087 0.494070 166.181469 \n", + "1 11568311796 242.890458 0.494278 162.216860 \n", + "2 11568311796 242.882858 0.492016 162.148665 \n", + "3 11568311796 242.313256 0.490587 161.856776 \n", + "4 11568311796 243.378389 0.494578 162.048801 \n", + ".. ... ... ... ... \n", + "95 521753090 31.169582 13.705979 13.310637 \n", + "96 521753090 69.664148 52.051985 13.323399 \n", + "97 521753090 67.827847 50.425657 13.305850 \n", + "98 521753090 65.893562 48.255377 13.326851 \n", + "99 521753090 67.729488 50.291131 13.313098 \n", + "\n", + " column_setup file_location \n", + "0 100pct depot \n", + "1 100pct depot \n", + "2 100pct depot \n", + "3 100pct depot \n", + "4 100pct depot \n", + ".. ... ... \n", + "95 5pct xrootd \n", + "96 5pct xrootd \n", + "97 5pct xrootd \n", + "98 5pct xrootd \n", + "99 5pct xrootd \n", + "\n", + "[100 rows x 14 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "report.sort_values(by=['column_setup', 'file_location']).reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fbd5b873-2df4-40ee-bc5f-042dd7c640ec", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_186141/1348247025.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", + " report.groupby(['column_setup', 'file_location']).agg('mean')\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
n_filesn_columns_readn_eventsloaded_columnsworker_operation_timen_workerscompressed_bytesuncompressed_bytestime:run_processortime:waittime:decompress
2012061728620True0sequential117548891252175309016.1414980.01058213.2362675pcteos_fusecolumn_setupfile_location
2112061728620True0sequential117548891252175309016.2056290.01078013.2846105pctwork100pctdepot1.01249.03.854952e+091.00.01.02.048078e+091.156831e+10243.6542090.493106162.890514
2212061728620True0sequential117548891252175309016.1788970.01070413.2673685pctworkeos_fuse1.01249.03.854952e+091.00.01.02.048078e+091.156831e+10244.3840490.492532163.389472
2312061728620True0sequential117548891252175309016.3418410.01081813.3273475pctworkwork1.01249.03.854952e+091.00.01.02.048078e+091.156831e+10244.7295980.492726163.492540
2412061728620True0sequential117548891252175309017.0736310.22674413.2673855pctxcachexcache1.01249.03.854952e+091.00.01.02.048078e+091.156831e+10295.8946267.241993162.952794
2512061728620True0sequential117548891252175309017.1487250.20183013.2626595pctxcachexrootd1.01249.03.854952e+091.00.01.02.048078e+091.156831e+10396.859249112.672174162.690041
2612061728620True0sequential117548891252175309017.1893370.21666513.2827495pctxcache10pctdepot1.050.01.543216e+081.00.01.02.655942e+081.129031e+0930.9862810.03147024.177480
2712061728620True0sequential117548891252175309069.02672052.12725713.2727055pctxrootdeos_fuse1.050.01.543216e+081.00.01.02.655942e+081.129031e+0930.9937360.03142324.189793
2812061728620True0sequential117548891252175309060.01388943.19848413.2592635pctxrootdwork1.050.01.543216e+081.00.01.02.655942e+081.129031e+0931.1723220.03159224.304306
2912061728620True0sequential117548891252175309072.99948356.16232913.2681135pctxrootdxcache1.050.01.543216e+081.00.01.02.655942e+081.129031e+0934.4313470.41906424.220722
xrootd1.050.01.543216e+081.00.01.02.655942e+081.129031e+0957.05568023.07596124.225699
50pctdepot1.0233.07.191384e+081.00.01.01.152109e+095.636058e+09122.0847590.14612994.383458
eos_fuse1.0233.07.191384e+081.00.01.01.152109e+095.636058e+09125.8643710.14631695.105503
work1.0233.07.191384e+081.00.01.01.152109e+095.636058e+09122.4698300.14611794.683736
xcache1.0233.07.191384e+081.00.01.01.152109e+095.636058e+09144.0090017.86597494.551497
xrootd1.0233.07.191384e+081.00.01.01.152109e+095.636058e+09195.64133860.47158094.515743
5pctdepot1.020.06.172862e+071.00.01.01.754889e+085.217531e+0816.7243320.01098713.306242
eos_fuse1.020.06.172862e+071.00.01.01.754889e+085.217531e+0816.5939390.01098513.306679
work1.020.06.172862e+071.00.01.01.754889e+085.217531e+0817.3316040.01105413.388079
xcache1.020.06.172862e+071.00.01.01.754889e+085.217531e+0817.7032810.20344113.307491
xrootd1.020.06.172862e+071.00.01.01.754889e+085.217531e+0860.45692542.94602613.315967
\n", "
" ], "text/plain": [ - " n_files n_columns_read n_events loaded_columns worker_operation_time \\\n", - "0 1 50 154321550 True 0 \n", - "1 1 50 154321550 True 0 \n", - "2 1 50 154321550 True 0 \n", - "3 1 50 154321550 True 0 \n", - "4 1 50 154321550 True 0 \n", - "5 1 50 154321550 True 0 \n", - "6 1 50 154321550 True 0 \n", - "7 1 50 154321550 True 0 \n", - "8 1 50 154321550 True 0 \n", - "9 1 50 154321550 True 0 \n", - "10 1 50 154321550 True 0 \n", - "11 1 50 154321550 True 0 \n", - "12 1 50 154321550 True 0 \n", - "13 1 50 154321550 True 0 \n", - "14 1 50 154321550 True 0 \n", - "15 1 20 61728620 True 0 \n", - "16 1 20 61728620 True 0 \n", - "17 1 20 61728620 True 0 \n", - "18 1 20 61728620 True 0 \n", - "19 1 20 61728620 True 0 \n", - "20 1 20 61728620 True 0 \n", - "21 1 20 61728620 True 0 \n", - "22 1 20 61728620 True 0 \n", - "23 1 20 61728620 True 0 \n", - "24 1 20 61728620 True 0 \n", - "25 1 20 61728620 True 0 \n", - "26 1 20 61728620 True 0 \n", - "27 1 20 61728620 True 0 \n", - "28 1 20 61728620 True 0 \n", - "29 1 20 61728620 True 0 \n", + " n_files n_columns_read n_events \\\n", + "column_setup file_location \n", + "100pct depot 1.0 1249.0 3.854952e+09 \n", + " eos_fuse 1.0 1249.0 3.854952e+09 \n", + " work 1.0 1249.0 3.854952e+09 \n", + " xcache 1.0 1249.0 3.854952e+09 \n", + " xrootd 1.0 1249.0 3.854952e+09 \n", + "10pct depot 1.0 50.0 1.543216e+08 \n", + " eos_fuse 1.0 50.0 1.543216e+08 \n", + " work 1.0 50.0 1.543216e+08 \n", + " xcache 1.0 50.0 1.543216e+08 \n", + " xrootd 1.0 50.0 1.543216e+08 \n", + "50pct depot 1.0 233.0 7.191384e+08 \n", + " eos_fuse 1.0 233.0 7.191384e+08 \n", + " work 1.0 233.0 7.191384e+08 \n", + " xcache 1.0 233.0 7.191384e+08 \n", + " xrootd 1.0 233.0 7.191384e+08 \n", + "5pct depot 1.0 20.0 6.172862e+07 \n", + " eos_fuse 1.0 20.0 6.172862e+07 \n", + " work 1.0 20.0 6.172862e+07 \n", + " xcache 1.0 20.0 6.172862e+07 \n", + " xrootd 1.0 20.0 6.172862e+07 \n", + "\n", + " loaded_columns worker_operation_time n_workers \\\n", + "column_setup file_location \n", + "100pct depot 1.0 0.0 1.0 \n", + " eos_fuse 1.0 0.0 1.0 \n", + " work 1.0 0.0 1.0 \n", + " xcache 1.0 0.0 1.0 \n", + " xrootd 1.0 0.0 1.0 \n", + "10pct depot 1.0 0.0 1.0 \n", + " eos_fuse 1.0 0.0 1.0 \n", + " work 1.0 0.0 1.0 \n", + " xcache 1.0 0.0 1.0 \n", + " xrootd 1.0 0.0 1.0 \n", + "50pct depot 1.0 0.0 1.0 \n", + " eos_fuse 1.0 0.0 1.0 \n", + " work 1.0 0.0 1.0 \n", + " xcache 1.0 0.0 1.0 \n", + " xrootd 1.0 0.0 1.0 \n", + "5pct depot 1.0 0.0 1.0 \n", + " eos_fuse 1.0 0.0 1.0 \n", + " work 1.0 0.0 1.0 \n", + " xcache 1.0 0.0 1.0 \n", + " xrootd 1.0 0.0 1.0 \n", "\n", - " executor n_workers compressed_bytes uncompressed_bytes \\\n", - "0 sequential 1 265594188 1129031467 \n", - "1 sequential 1 265594188 1129031467 \n", - "2 sequential 1 265594188 1129031467 \n", - "3 sequential 1 265594188 1129031467 \n", - "4 sequential 1 265594188 1129031467 \n", - "5 sequential 1 265594188 1129031467 \n", - "6 sequential 1 265594188 1129031467 \n", - "7 sequential 1 265594188 1129031467 \n", - "8 sequential 1 265594188 1129031467 \n", - "9 sequential 1 265594188 1129031467 \n", - "10 sequential 1 265594188 1129031467 \n", - "11 sequential 1 265594188 1129031467 \n", - "12 sequential 1 265594188 1129031467 \n", - "13 sequential 1 265594188 1129031467 \n", - "14 sequential 1 265594188 1129031467 \n", - "15 sequential 1 175488912 521753090 \n", - "16 sequential 1 175488912 521753090 \n", - "17 sequential 1 175488912 521753090 \n", - "18 sequential 1 175488912 521753090 \n", - "19 sequential 1 175488912 521753090 \n", - "20 sequential 1 175488912 521753090 \n", - "21 sequential 1 175488912 521753090 \n", - "22 sequential 1 175488912 521753090 \n", - "23 sequential 1 175488912 521753090 \n", - "24 sequential 1 175488912 521753090 \n", - "25 sequential 1 175488912 521753090 \n", - "26 sequential 1 175488912 521753090 \n", - "27 sequential 1 175488912 521753090 \n", - "28 sequential 1 175488912 521753090 \n", - "29 sequential 1 175488912 521753090 \n", + " compressed_bytes uncompressed_bytes \\\n", + "column_setup file_location \n", + "100pct depot 2.048078e+09 1.156831e+10 \n", + " eos_fuse 2.048078e+09 1.156831e+10 \n", + " work 2.048078e+09 1.156831e+10 \n", + " xcache 2.048078e+09 1.156831e+10 \n", + " xrootd 2.048078e+09 1.156831e+10 \n", + "10pct depot 2.655942e+08 1.129031e+09 \n", + " eos_fuse 2.655942e+08 1.129031e+09 \n", + " work 2.655942e+08 1.129031e+09 \n", + " xcache 2.655942e+08 1.129031e+09 \n", + " xrootd 2.655942e+08 1.129031e+09 \n", + "50pct depot 1.152109e+09 5.636058e+09 \n", + " eos_fuse 1.152109e+09 5.636058e+09 \n", + " work 1.152109e+09 5.636058e+09 \n", + " xcache 1.152109e+09 5.636058e+09 \n", + " xrootd 1.152109e+09 5.636058e+09 \n", + "5pct depot 1.754889e+08 5.217531e+08 \n", + " eos_fuse 1.754889e+08 5.217531e+08 \n", + " work 1.754889e+08 5.217531e+08 \n", + " xcache 1.754889e+08 5.217531e+08 \n", + " xrootd 1.754889e+08 5.217531e+08 \n", "\n", - " time:run_processor time:wait time:decompress column_setup file_location \n", - "0 30.459931 0.030563 24.061388 10pct depot \n", - "1 30.506349 0.031068 24.128228 10pct depot \n", - "2 30.462117 0.030792 24.097522 10pct depot \n", - "3 30.513377 0.030538 24.048350 10pct eos_fuse \n", - "4 30.442081 0.030833 24.080730 10pct eos_fuse \n", - "5 30.533058 0.030703 24.076658 10pct eos_fuse \n", - "6 34.893134 0.030747 24.201373 10pct work \n", - "7 30.539464 0.030596 24.140102 10pct work \n", - "8 30.569113 0.030507 24.164185 10pct work \n", - "9 32.927919 0.320015 24.198597 10pct xcache \n", - "10 33.032840 0.316362 24.168849 10pct xcache \n", - "11 33.051579 0.582236 24.132116 10pct xcache \n", - "12 104.772271 72.213381 24.164070 10pct xrootd \n", - "13 62.811575 30.291163 24.132109 10pct xrootd \n", - "14 55.180227 23.014281 24.131685 10pct xrootd \n", - "15 16.648896 0.010548 13.257322 5pct depot \n", - "16 16.239849 0.010682 13.230332 5pct depot \n", - "17 16.274737 0.010654 13.235920 5pct depot \n", - "18 16.194398 0.010595 13.220522 5pct eos_fuse \n", - "19 16.217556 0.010699 13.240024 5pct eos_fuse \n", - "20 16.141498 0.010582 13.236267 5pct eos_fuse \n", - "21 16.205629 0.010780 13.284610 5pct work \n", - "22 16.178897 0.010704 13.267368 5pct work \n", - "23 16.341841 0.010818 13.327347 5pct work \n", - "24 17.073631 0.226744 13.267385 5pct xcache \n", - "25 17.148725 0.201830 13.262659 5pct xcache \n", - "26 17.189337 0.216665 13.282749 5pct xcache \n", - "27 69.026720 52.127257 13.272705 5pct xrootd \n", - "28 60.013889 43.198484 13.259263 5pct xrootd \n", - "29 72.999483 56.162329 13.268113 5pct xrootd " + " time:run_processor time:wait time:decompress \n", + "column_setup file_location \n", + "100pct depot 243.654209 0.493106 162.890514 \n", + " eos_fuse 244.384049 0.492532 163.389472 \n", + " work 244.729598 0.492726 163.492540 \n", + " xcache 295.894626 7.241993 162.952794 \n", + " xrootd 396.859249 112.672174 162.690041 \n", + "10pct depot 30.986281 0.031470 24.177480 \n", + " eos_fuse 30.993736 0.031423 24.189793 \n", + " work 31.172322 0.031592 24.304306 \n", + " xcache 34.431347 0.419064 24.220722 \n", + " xrootd 57.055680 23.075961 24.225699 \n", + "50pct depot 122.084759 0.146129 94.383458 \n", + " eos_fuse 125.864371 0.146316 95.105503 \n", + " work 122.469830 0.146117 94.683736 \n", + " xcache 144.009001 7.865974 94.551497 \n", + " xrootd 195.641338 60.471580 94.515743 \n", + "5pct depot 16.724332 0.010987 13.306242 \n", + " eos_fuse 16.593939 0.010985 13.306679 \n", + " work 17.331604 0.011054 13.388079 \n", + " xcache 17.703281 0.203441 13.307491 \n", + " xrootd 60.456925 42.946026 13.315967 " ] }, - "execution_count": 8, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "report.sort_values(by=['column_setup', 'file_location']).reset_index(drop=True)" + "report.groupby(['column_setup', 'file_location']).agg('mean')" ] }, { "cell_type": "code", - "execution_count": 38, - "id": "c0a788da-d160-4c81-95ea-150ef4d9a85e", + "execution_count": 30, + "id": "5150e7dc-b728-43f8-b3a0-6b9843e33e97", "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "report[\"MB/s (compressed)\"] = report.compressed_bytes/report['time:run_processor']/(1024*1024)\n", - "report[\"column_setup\"] = report[\"column_setup\"].apply(lambda x: x.replace('pct', '% of the file'))\n", + "report[\"column_setup\"] = report[\"column_setup\"].apply(lambda x: x.replace('pct', '% of data'))\n", "\n", - "measurements = [\"time:run_processor\", \"time:decompress\", \"time:wait\", \"MB/s (compressed)\"]\n", + "measurements = [\"time:run_processor\", \"time:decompress\", \"time:wait\"]\n", "other_cols = [c for c in report.columns if c not in measurements]\n", "\n", "agg = {}\n", @@ -884,13 +944,31 @@ "\n", "aggregated = report.groupby(other_cols).agg(agg)\n", "\n", - "\n", - "# Reset the index to make it easier to plot\n", "aggregated_reset = aggregated.reset_index()\n", - " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "32757e8a-08b4-4396-a2a3-46e9e40bbcb7", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", "\n", - "# Plotting\n", - "fig, ax = plt.subplots()\n", + "fig, ax = plt.subplots(figsize=(9,5))\n", "\n", "# Heights of the bars\n", "decompress = aggregated_reset[('time:decompress', 'mean')]\n", @@ -905,6 +983,7 @@ "\n", "bar_width = 1\n", "\n", + "# Arrange bars in groups by column setup\n", "group_pos = 0\n", "step = 1.25\n", "group_gap = 1\n", @@ -924,14 +1003,13 @@ "p3 = ax.bar(x.position, run_processor, bar_width, yerr=run_processor_err, label='time:run_processor', \n", " facecolor='none', linewidth=2, edgecolor='tab:blue', ecolor='tab:blue', capsize=4)\n", "\n", - "# Adding labels and title\n", - "# ax.set_xlabel('File location and fraction of the size')\n", "ax.set_xticks(x.position, x.file_location)\n", "ax.set_ylabel('Time (s)')\n", - "ax.set_ylim(0,110)\n", + "# ax.set_ylim(0,110)\n", "ax.set_title('')\n", - "ax.legend()\n", - "plt.xticks(rotation=45)\n", + "# ax.legend()\n", + "ax.legend(handles=[p3, p1, p2], labels=['Total run time', 'Data decompression', 'Idle time (waiting for data)'])\n", + "plt.xticks(rotation=60)\n", "\n", "ax2 = ax.secondary_xaxis('bottom')\n", "ax2_labels = x.groupby(\"column_setup\").mean(numeric_only=True).reset_index()\n", @@ -941,30 +1019,8 @@ "ax2.tick_params(which='both', length=0)\n", "\n", "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ff7179c-b407-47b6-b8d0-2bb25ed0649b", - "metadata": {}, - "outputs": [], - "source": [ - "# report[\"event_rate\"] = report.n_events / report.run_processor / report.n_columns_read\n", - "# report[\"data_rate_comp\"] = report.compressed_bytes / report.run_processor\n", - "# report[\"data_rate_uncomp\"] = report.uncompressed_bytes / report.run_processor\n", - "# report[[\"column_setup\", \"n_workers\", \"event_rate\", \"data_rate_comp\", \"data_rate_uncomp\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cdc0e36b-a557-407e-98b8-9ca52360933b", - "metadata": {}, - "outputs": [], - "source": [ - "# report.compressed_bytes / report.n_events * report.n_columns_read" + "plt.show()\n", + "fig.savefig(f\"{output_path}/plot.pdf\", bbox_inches='tight')" ] }, {