Skip to content

Commit

Permalink
add MultiManager to h5pyd __init__.py (#185)
Browse files Browse the repository at this point in the history
* add MultiManager to h5pyd __init__.py

* fix flake8 warnings

* remove dup code

* print error if sub-folder not present for multi_benchmark
  • Loading branch information
jreadey authored Apr 24, 2024
1 parent 90025e8 commit a54d95a
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 27 deletions.
2 changes: 1 addition & 1 deletion h5pyd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ._hl.files import File, is_hdf5
from ._hl.folders import Folder
from ._hl.group import Group, SoftLink, ExternalLink, UserDefinedLink, HardLink
from ._hl.dataset import Dataset
from ._hl.dataset import Dataset, MultiManager
from ._hl.table import Table
from ._hl.datatype import Datatype
from ._hl.attrs import AttributeManager
Expand Down
11 changes: 8 additions & 3 deletions h5pyd/_hl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import base64
import numpy
import os
import logging
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed

Expand Down Expand Up @@ -1741,10 +1742,14 @@ class MultiManager():
# Avoid overtaxing HSDS
max_workers = 16

def __init__(self, datasets=None):
def __init__(self, datasets=None, logger=None):
if (datasets is None) or (len(datasets) == 0):
raise ValueError("MultiManager requires non-empty list of datasets")
self.datasets = datasets
if logger is None:
self.log = logging
else:
self.log = logging.getLogger(logger)

def read_dset_tl(self, args):
"""
Expand Down Expand Up @@ -1793,7 +1798,7 @@ def __getitem__(self, args):

except Exception as e:
msg = f"{e}: Defaulting Number of SN_COREs to 1"
self.log.warning(msg)
self.log.debug(msg)
num_endpoints = 1

if (num_endpoints > 1):
Expand Down Expand Up @@ -1848,7 +1853,7 @@ def __setitem__(self, args, vals):
raise ValueError("Malformed port range specification; must be sequential ports")

except Exception as e:
print(f"{e}: Defaulting Number of SNs to 1")
self.log.debug(f"{e}: Defaulting Number of SNs to 1")
num_endpoints = 1

# TODO: Handle the case where some or all datasets share an HTTPConn object
Expand Down
44 changes: 30 additions & 14 deletions test/hl/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from __future__ import absolute_import

import sys
import os
import os.path as op
import tempfile
Expand All @@ -39,6 +38,32 @@
del testfile


def getTestFileName(basename, subfolder=None):
"""
Get filepath for a test case given a testname
"""

if config.get("use_h5py"):
filename = "out"
if not op.isdir(filename):
os.mkdir(filename)
if subfolder:
filename = op.join(filename, subfolder)
if not op.isdir(filename):
os.mkdir(filename)
filename = op.join(filename, f"{basename}.h5")
else:
if "H5PYD_TEST_FOLDER" in os.environ:
filename = os.environ["H5PYD_TEST_FOLDER"]
else:
# default to the root folder
filename = "/"
if subfolder:
filename = op.join(filename, subfolder)
filename = op.join(filename, f"{basename}.h5")
return filename


class TestCase(ut.TestCase):

"""
Expand Down Expand Up @@ -201,23 +226,14 @@ def assertNumpyBehavior(self, dset, arr, s):
with self.assertRaises(exc):
dset[s]

def getFileName(self, basename):
def getFileName(self, basename, subfolder=None):
"""
Get filepath for a test case given a testname
"""

if config.get("use_h5py"):
if not op.isdir("out"):
os.mkdir("out")
filename = "out/" + basename + ".h5"
else:
if "H5PYD_TEST_FOLDER" in os.environ:
domain = os.environ["H5PYD_TEST_FOLDER"]
else:
# default to the root folder
domain = "/"
filename = op.join(domain, basename)
filename += ".h5"
# Just call the external function
filename = getTestFileName(basename, subfolder=subfolder)

return filename

def getPathFromDomain(self, domain):
Expand Down
19 changes: 16 additions & 3 deletions test/hl/multi_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import numpy as np
import time
import sys

from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
import subprocess
import re

from h5pyd._hl.dataset import MultiManager
from h5pyd import MultiManager
import h5pyd as h5py
from common import getTestFileName

# Flag to stop resource usage collection thread after a benchmark finishes
stop_stat_collection = False
Expand Down Expand Up @@ -249,7 +251,17 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters):
dt = np.int32
stats = {}

fs = [h5py.File("/home/test_user1/h5pyd_multi_bm_" + str(i), mode='w') for i in range(count)]
fs = []

for i in range(count):
filename = getTestFileName(f"bm_{i:04d}", subfolder="multi_bm")
try:
f = h5py.File(filename, mode='w')
except IOError:
print(f"unable to create domain at: {filename} - does the parent folder exist?")
sys.exit(1)
fs.append(f)

data_in = np.zeros(shape, dtype=dt)
datasets = [f.create_dataset("data", shape, dtype=dt, data=data_in) for f in fs]

Expand All @@ -266,7 +278,8 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters):

print("Testing with shared HTTP connection...")

f = h5py.File("/home/test_user1/h5pyd_multi_bm_shared", mode='w')
filename = getTestFileName("bm_shared", subfolder="multi_bm")
f = h5py.File(filename, mode='w')
datasets = [f.create_dataset("data" + str(i), data=data_in, dtype=dt) for i in range(count)]

run_benchmark("Read Multi (Shared HttpConn)", read_datasets_multi, stats, datasets, num_iters)
Expand Down
4 changes: 2 additions & 2 deletions test/hl/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@
import sys
import numpy as np
import platform
import warnings

from common import ut, TestCase
from h5pyd._hl.dataset import MultiManager
import config
from h5pyd import MultiManager

if config.get("use_h5py"):
from h5py import File, Dataset
Expand All @@ -39,6 +38,7 @@

def is_empty_dataspace(obj):
shape_json = obj.shape_json

if "class" not in shape_json:
raise KeyError()
if shape_json["class"] == 'H5S_NULL':
Expand Down
2 changes: 0 additions & 2 deletions test/hl/test_dataset_extend.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
##############################################################################

import logging
import numpy as np
import math

import config

Expand Down
1 change: 0 additions & 1 deletion test/hl/test_dataset_fancyselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
##############################################################################

import numpy as np
import math

import config

Expand Down
1 change: 0 additions & 1 deletion test/hl/test_datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
##############################################################################

import numpy as np
import math
import logging
import config

Expand Down

0 comments on commit a54d95a

Please sign in to comment.