diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..440a7e7d --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,90 @@ +name: h5pyd CI + +on: + push: + pull_request: + branches: [master] + +env: + ADMIN_PASSWORD: admin + ADMIN_USERNAME: admin + USER_NAME: test_user1 + USER_PASSWORD: test + USER2_NAME: test_user2 + USER2_PASSWORD: test + HSDS_USERNAME: test_user1 + HSDS_PASSWORD: test + HSDS_ENDPOINT: http+unix://%2Ftmp%2Fhs%2Fsn_1.sock + ROOT_DIR: ${{github.workspace}}/hsds/hsds_root + BUCKET_NAME: hsds_bucket + HS_ENDPOINT: http+unix://%2Ftmp%2Fhs%2Fsn_1.sock + H5PYD_TEST_FOLDER: /home/test_user1/h5pyd_tests/ + +jobs: + build-and-test: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest,] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + shell: bash + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Lint with flake8 + shell: bash + run: | + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # stop the build if there are Python syntax errors or undefined names + flake8 . --count --max-complexity=10 --max-line-length=127 --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + + - name: Install package + shell: bash + run: | + pip install -e . + + - name: Checkout HSDS + uses: actions/checkout@v3 + with: + repository: HDFGroup/hsds + path: ${{github.workspace}}/hsds + + - name: Install HSDS + shell: bash + run: | + cd ${{github.workspace}}/hsds + pip install -e . + + - name: Start HSDS + shell: bash + run: | + cd ${{github.workspace}}/hsds + mkdir hsds_root + mkdir hsds_root/hsds_bucket + cp admin/config/groups.default admin/config/groups.txt + cp admin/config/passwd.default admin/config/passwd.txt + ./runall.sh --no-docker 1 & + sleep 11 # let the nodes get ready + python tests/integ/setup_test.py + + - name: Create h5pyd test folder + shell: bash + run: | + HS_USERNAME=test_user1 HS_PASSWORD=test TEST2_USERNAME=test_user1 TEST2_PASSWORD=test hstouch -v /home/test_user1/h5pyd_tests/ + + - name: Run h5pyd tests + shell: bash + run: | + HS_USERNAME=test_user1 HS_PASSWORD=test TEST2_USERNAME=test_user1 TEST2_PASSWORD=test python testall.py diff --git a/docs/conf.py b/docs/conf.py index a05879b9..6dde65b6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,9 +16,9 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.intersphinx', - 'sphinx.ext.extlinks', - 'sphinx.ext.mathjax', + 'sphinx.ext.intersphinx', + 'sphinx.ext.extlinks', + 'sphinx.ext.mathjax', ] # intersphinx_mapping = {'low': ('https://api.h5py.org', None)} diff --git a/examples/read_example.py b/examples/read_example.py index 835715d9..eeb66ba9 100755 --- a/examples/read_example.py +++ b/examples/read_example.py @@ -12,10 +12,12 @@ import h5pyd as h5py + def visit_item(name): print("visit:", name) return None + def find_g1_2(name): print("visit:", name) if name.endswith("g1.2"): @@ -26,12 +28,13 @@ def visit_item_obj(name, obj): print("visit:", name, obj.id.id) return None + print("version:", h5py.version.version) # this is the path specified in the "post-install instructions" # see: "Test Data Setup" in: # https://github.com/HDFGroup/hsds/blob/master/docs/post_install.md -DOMAIN_PATH="/home/test_user1/test/tall.h5" +DOMAIN_PATH = "/home/test_user1/test/tall.h5" print("opening domain:", DOMAIN_PATH) f = h5py.File(DOMAIN_PATH, "r") @@ -85,6 +88,3 @@ def visit_item_obj(name, obj): print("search g1.2:") f.visit(find_g1_2) - - - diff --git a/examples/write_example.py b/examples/write_example.py index fbad8696..13f72289 100644 --- a/examples/write_example.py +++ b/examples/write_example.py @@ -25,7 +25,6 @@ CHUNKS = (1500, 275) - if len(sys.argv) == 1 or sys.argv[1] in ("-h", "--help"): s = f"usage: python {sys.argv[0]} " s += "[--ncols=n] " @@ -138,4 +137,4 @@ else: print("passed!") -f.close() \ No newline at end of file +f.close() diff --git a/h5pyd/__init__.py b/h5pyd/__init__.py index 089e3ba0..572c10cc 100644 --- a/h5pyd/__init__.py +++ b/h5pyd/__init__.py @@ -34,12 +34,12 @@ __doc__ = \ -""" + """ This is the h5pyd package, a Python interface to the HDF REST Server. Version %s -""" % (version.version) + """ % (version.version) def enable_ipython_completer(): diff --git a/h5pyd/_apps/config.py b/h5pyd/_apps/config.py index 0f42c594..c1d096c9 100755 --- a/h5pyd/_apps/config.py +++ b/h5pyd/_apps/config.py @@ -38,7 +38,7 @@ "flags": ["--api_key",], "help": "user api key", "choices": ["API_KEY"] - }, + }, "hs_bucket": { "default": None, "flags": ["--bucket",], @@ -67,11 +67,12 @@ "default": False, "flags": ["--ignore",], "help": "don't exit on error" - } + } } hscmds = ("hsinfo", "hsconfigure", "hsls", "hstouch", "hsload", "hsget", "hsacl", "hsrm", "hsdiff") + class Config: """ User Config state @@ -163,7 +164,7 @@ def setitem(self, name, value, flags=None, choices=None, help=None): """ Set a config item """ if name not in self._names: self._names.append(name) - self._values[name] = value + self._values[name] = value if flags is not None: self._flags[name] = flags for flag in flags: @@ -175,7 +176,7 @@ def setitem(self, name, value, flags=None, choices=None, help=None): def __setitem__(self, name, value): self.setitem(name, value) - + def __len__(self): return len(self._names) @@ -212,15 +213,14 @@ def get_see_also(self, this_cmd): msg += f"{cmd}, " msg = msg[:-2] # remove trailing comma return msg - def get_help_message(self, name): - help_text= self.get_help(name) + help_text = self.get_help(name) flags = self.get_flags(name) choices = self.get_choices(name) if not help_text or len(flags) == 0: return None - + msg = flags[0] for i in range(1, len(flags)): msg += f", {flags[i]}" @@ -228,19 +228,18 @@ def get_help_message(self, name): if len(choices) == 1: msg += f" {choices[0]}" else: - msg += " {" + msg += " {" for choice in choices: msg += f"{choice}|" msg = msg[:-1] msg += "}" if len(msg) < 40: - pad = " "*(40 - len(msg)) + pad = " " * (40 - len(msg)) msg += pad - + msg += f" {help_text}" - + return msg - def get_nargs(self, name): choices = self._choices.get(name) @@ -260,7 +259,7 @@ def get_names(self): def set_cmd_flags(self, args, allow_post_flags=False): """ process any command line options - return any place argument as a list + return any place argument as a list """ options = [] argn = 0 @@ -268,7 +267,7 @@ def set_cmd_flags(self, args, allow_post_flags=False): arg = args[argn] val = None if len(args) > argn + 1: - val = args[argn+1] + val = args[argn + 1] if not arg.startswith("-"): options.append(arg) argn += 1 @@ -299,7 +298,7 @@ def set_cmd_flags(self, args, allow_post_flags=False): self._values[name] = val argn += 2 return options - + def get_loglevel(self): val = self._values["loglevel"] val = val.upper() @@ -329,13 +328,3 @@ def print(self, msg): logging.info(msg) if self._values.get("verbose"): print(msg) - - - - - - - - - - diff --git a/h5pyd/_apps/hsacl.py b/h5pyd/_apps/hsacl.py index 3fe3ec03..0da3bf4d 100755 --- a/h5pyd/_apps/hsacl.py +++ b/h5pyd/_apps/hsacl.py @@ -20,6 +20,7 @@ cfg = Config() + # # log error and abort app # @@ -31,12 +32,13 @@ def abort(msg): logging.error("exiting program with return code -1") sys.exit(-1) + # # get given ACL, return None if not found # def getACL(f, username="default"): try: - acl = f.getACL(username) + acl = f.getACL(username) except IOError as ioe: if ioe.errno == 403: print("No permission to read ACL for this domain") @@ -54,6 +56,7 @@ def getACL(f, username="default"): del acl["domain"] return acl + # # Usage # @@ -71,7 +74,7 @@ def usage(): for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("Arguments:") @@ -95,9 +98,8 @@ def usage(): sys.exit() - def main(): - perm_abvr = {'c':'create', 'r': 'read', 'u': 'update', 'd': 'delete', 'e': 'readACL', 'p':'updateACL'} + perm_abvr = {'c': 'create', 'r': 'read', 'u': 'update', 'd': 'delete', 'e': 'readACL', 'p': 'updateACL'} fields = ('username', 'create', 'read', 'update', 'delete', 'readACL', 'updateACL') domain = None perm = None @@ -108,13 +110,12 @@ def main(): # additional options cfg.setitem("help", False, flags=["-h", "--help"], help="this message") - try: cmdline_args = cfg.set_cmd_flags(sys.argv[1:], allow_post_flags=True) except ValueError as ve: print(ve) usage() - + if len(cmdline_args) == 0: # need a domain usage() @@ -176,9 +177,11 @@ def main(): # open the domain or folder try: if domain[-1] == '/': - f = h5pyd.Folder(domain, mode=mode, endpoint=cfg["hs_endpoint"], username=cfg["hs_username"], password=cfg["hs_password"], bucket=cfg["hs_bucket"]) + f = h5pyd.Folder(domain, mode=mode, endpoint=cfg["hs_endpoint"], + username=cfg["hs_username"], password=cfg["hs_password"], bucket=cfg["hs_bucket"]) else: - f = h5pyd.File(domain, mode=mode, endpoint=cfg["hs_endpoint"], username=cfg["hs_username"], password=cfg["hs_password"], bucket=cfg["hs_bucket"]) + f = h5pyd.File(domain, mode=mode, endpoint=cfg["hs_endpoint"], + username=cfg["hs_username"], password=cfg["hs_password"], bucket=cfg["hs_bucket"]) except IOError as ioe: if ioe.errno in (404, 410): abort("domain not found") @@ -239,9 +242,10 @@ def main(): else: abort(f"Unexpected error: {ioe}") print("%015s %08s %08s %08s %08s %08s %08s " % fields) - print("-"*80) + print("-" * 80) for acl in acls: - vals = (acl["userName"], acl["create"], acl["read"], acl["update"], acl["delete"], acl["readACL"], acl["updateACL"]) + vals = (acl["userName"], acl["create"], acl["read"], + acl["update"], acl["delete"], acl["readACL"], acl["updateACL"]) print("%015s %08s %08s %08s %08s %08s %08s " % vals) else: header_printed = False # don't print header until we have at least one ACL @@ -250,9 +254,10 @@ def main(): acl = f.getACL(username) if not header_printed: print("%015s %08s %08s %08s %08s %08s %08s " % fields) - print("-"*80) + print("-" * 80) header_printed = True - vals = (acl["userName"], acl["create"], acl["read"], acl["update"], acl["delete"], acl["readACL"], acl["updateACL"]) + vals = (acl["userName"], acl["create"], acl["read"], + acl["update"], acl["delete"], acl["readACL"], acl["updateACL"]) print("%015s %08s %08s %08s %08s %08s %08s " % vals) except IOError as ioe: if ioe.errno == 403: @@ -267,5 +272,6 @@ def main(): f.close() + if __name__ == "__main__": main() diff --git a/h5pyd/_apps/hsconfigure.py b/h5pyd/_apps/hsconfigure.py index 3bb44228..72064fab 100644 --- a/h5pyd/_apps/hsconfigure.py +++ b/h5pyd/_apps/hsconfigure.py @@ -6,6 +6,7 @@ else: from .config import Config + # # input function that works with Python 2 or 3 # @@ -13,6 +14,7 @@ def get_input(prompt): result = input(prompt) return result + # # Save configuration file # @@ -39,6 +41,7 @@ def saveConfig(username, password, endpoint, api_key): else: file.write("hs_api_key = \n") + # # Check to see if we can get a response from the server # @@ -73,6 +76,8 @@ def pingServer(username, password, endpoint, api_key): print("Unexpected response from server") return False return True + + # # Main # @@ -138,5 +143,6 @@ def main(): if quit in ("Y", "y"): break + if __name__ == "__main__": main() diff --git a/h5pyd/_apps/hscopy.py b/h5pyd/_apps/hscopy.py index 05141333..77c3e8d9 100755 --- a/h5pyd/_apps/hscopy.py +++ b/h5pyd/_apps/hscopy.py @@ -23,6 +23,7 @@ cfg = Config() + # ---------------------------------------------------------------------------------- def usage(): option_names = cfg.get_names() @@ -35,12 +36,12 @@ def usage(): print(" SOURCE: HSDS domain (absolute path with or without 'hdf5:// prefix)") print(" DEST: HSDS domain or folder (path as above ending in '/')") print("") - + print("Options:") for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("examples:") print(f" {cmd} /myfolder/orig.h5 /myfolder/copy.h5") @@ -66,17 +67,18 @@ def getFile(domain): if not bucket: bucket = cfg["hs_bucket"] - fh = h5pyd.File(domain, - mode='r', - endpoint=endpoint, - username=username, - password=password, - bucket=bucket) + fh = h5pyd.File(domain, + mode='r', + endpoint=endpoint, + username=username, + password=password, + bucket=bucket) return fh + def createFile(domain, linked_domain=None, no_clobber=False): - #print("createFile", domain) + # print("createFile", domain) username = cfg["des_username"] if not username: username = cfg["hs_username"] @@ -90,17 +92,17 @@ def createFile(domain, linked_domain=None, no_clobber=False): if not bucket: bucket = cfg["hs_bucket"] if cfg["no_clobber"]: - mode= "x" + mode = "x" else: - mode="w" - - fh = h5pyd.File(domain, - mode=mode, - endpoint=endpoint, - username=username, - password=password, - bucket=bucket) - + mode = "w" + + fh = h5pyd.File(domain, + mode=mode, + endpoint=endpoint, + username=username, + password=password, + bucket=bucket) + return fh @@ -109,16 +111,24 @@ def createFile(domain, linked_domain=None, no_clobber=False): # ---------------------------------------------------------------------------------- def main(): - cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite any domains") - cfg.setitem("src_endpoint", None, flags=["--src-endpoint"], choices=["ENDPOINT",], help="server endpoint for source domain") - cfg.setitem("src_username", False, flags=["--src-user"], choices=["USERNAME",], help="user name credential for source domain") - cfg.setitem("src_password", False, flags=["--src-password"], choices=["PASSWORD",], help="password credential for source domain") - cfg.setitem("src_bucket", False, flags=["--src-bucket"], choices=["BUCKET"], help="storage bucket for source domain") - cfg.setitem("des_endpoint", None, flags=["--des-endpoint"], choices=["ENDPOINT",], help="server endpoint for dest domain") - cfg.setitem("des_username", False, flags=["--des-user"], choices=["USERNAME",], help="user name credential for dest domain") - cfg.setitem("des_password", False, flags=["--des-password"], choices=["PASSWORD",], help="password credential for dest domain") + cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite any domains") + cfg.setitem("src_endpoint", None, flags=["--src-endpoint"], choices=["ENDPOINT",], + help="server endpoint for source domain") + cfg.setitem("src_username", False, flags=["--src-user"], choices=["USERNAME",], + help="user name credential for source domain") + cfg.setitem("src_password", False, flags=["--src-password"], choices=["PASSWORD",], + help="password credential for source domain") + cfg.setitem("src_bucket", False, flags=["--src-bucket"], choices=["BUCKET"], + help="storage bucket for source domain") + cfg.setitem("des_endpoint", None, flags=["--des-endpoint"], choices=["ENDPOINT",], + help="server endpoint for dest domain") + cfg.setitem("des_username", False, flags=["--des-user"], choices=["USERNAME",], + help="user name credential for dest domain") + cfg.setitem("des_password", False, flags=["--des-password"], choices=["PASSWORD",], + help="password credential for dest domain") cfg.setitem("des_bucket", False, flags=["--des-bucket"], choices=["BUCKET"], help="storage bucket for dest domain") - cfg.setitem("compress", 0, flags=["-z",], choices=["LEVEL",], help="compression level from 0 (no compression) to 9 (highest)") + cfg.setitem("compress", 0, flags=["-z",], choices=["LEVEL",], + help="compression level from 0 (no compression) to 9 (highest)") cfg.setitem("nodata", False, flags=["--nodata",], help="do not copy dataset data") cfg.setitem("help", False, flags=["-h", "--help"], help="this message") @@ -130,7 +140,7 @@ def main(): if len(domains) < 2: usage() - + src_domain = domains[0] des_domain = domains[1] @@ -147,7 +157,7 @@ def main(): msg = "Compression Level must be int between 0 and 9" logging.error(msg) sys.exit(msg) - + # setup logging logfname = cfg["logfile"] loglevel = cfg.get_loglevel() @@ -177,12 +187,12 @@ def main(): sys.exit(msg) if des_domain[-1] == "/": - # pull out the basename of src and add it to the + # pull out the basename of src and add it to the # end of des_domain fields = src_domain.split("/") des_domain += fields[-1] cfg.print(f"using {des_domain} for destination") - + # get a handle to input file try: fin = getFile(src_domain) @@ -191,7 +201,6 @@ def main(): logging.error(msg) sys.exit(msg) - try: fout = createFile(des_domain) except IOError as ioe: diff --git a/h5pyd/_apps/hsdel.py b/h5pyd/_apps/hsdel.py index 1a3b5352..2ec67d10 100644 --- a/h5pyd/_apps/hsdel.py +++ b/h5pyd/_apps/hsdel.py @@ -13,6 +13,7 @@ cfg = Config() + def getFolder(domain, mode='r'): username = cfg["hs_username"] password = cfg["hs_password"] @@ -21,13 +22,13 @@ def getFolder(domain, mode='r'): folder = h5py.Folder(domain, mode=mode, endpoint=endpoint, username=username, password=password, bucket=bucket) return folder + def exitUnlessIgnore(msg): if cfg["ignore"]: return sys.exit(msg) - def deleteDomain(domain): # get handle to parent folder @@ -39,7 +40,7 @@ def deleteDomain(domain): base_name = op.basename(path) if len(parent_domain) < 2: - #sys.exit("can't get parent domain") + # sys.exit("can't get parent domain") parent_domain = '/' if not parent_domain.endswith('/'): @@ -102,7 +103,7 @@ def deleteDomain(domain): else: msg = f"Domain: {domain} deleted" cfg.print(msg) - + # # Usage @@ -121,7 +122,7 @@ def usage(): for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("Examples:") print(f" {cmd} /home/myfolder/file1.h5 /home/myfolder/file2.h5") @@ -131,6 +132,7 @@ def usage(): print("") sys.exit() + # # Main # diff --git a/h5pyd/_apps/hsdiff.py b/h5pyd/_apps/hsdiff.py index 7f8a34dc..8f6cac74 100755 --- a/h5pyd/_apps/hsdiff.py +++ b/h5pyd/_apps/hsdiff.py @@ -35,13 +35,14 @@ cfg = Config() + def getFile(domain, mode="r"): username = cfg["hs_username"] password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] fh = h5pyd.File(domain, mode=mode, endpoint=endpoint, username=username, - password=password, bucket=bucket, use_cache=True) + password=password, bucket=bucket, use_cache=True) return fh @@ -347,10 +348,9 @@ def diff_dataset(src, ctx): print("quiet output differ") ctx["differences"] += 1 return False - if src.chunks is None: - # assume that the dataset is small enough that we can + # assume that the dataset is small enough that we can # read all the values into memory. # TBD: use some sort of psuedo-chunk iteration for large # contiguous datasetsChunkIter @@ -389,12 +389,12 @@ def diff_dataset(src, ctx): if isinstance(arr_tgt, np.ndarray): is_equal = np.array_equal(arr_src, arr_tgt) else: - is_equal = False # type not the same + is_equal = False # type not the same else: # just compare the objects directly if arr_src != arr_tgt: is_equal = False - + if not is_equal: msg = "values for dataset {} differ for slice: {}".format(src.name, s) logging.info(msg) @@ -461,7 +461,7 @@ def usage(): for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("Examples:") print(f" {cmd} myfile.h5 /home/myfolder/myfile.h5") @@ -499,7 +499,7 @@ def main(): usage() if cfg["quiet"] and cfg["verbose"]: - msg = "--quiet and --verbose options can't be used together" + msg = "--quiet and --verbose options can't be used together" sys.exit(msg) if len(args) < 2: @@ -514,7 +514,6 @@ def main(): logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) logging.debug(f"set log_level to {loglevel}") - rc = 0 s3 = None # s3fs instance @@ -573,7 +572,6 @@ def main(): kwargs["noattr"] = cfg["noattr"] kwargs["quiet"] = cfg["quiet"] rc = diff_file(fin, fout, **kwargs) - if not cfg["quiet"] and rc > 0: print(f"{rc} differences found") diff --git a/h5pyd/_apps/hsget.py b/h5pyd/_apps/hsget.py index 3e0cb789..b78784cc 100755 --- a/h5pyd/_apps/hsget.py +++ b/h5pyd/_apps/hsget.py @@ -27,9 +27,10 @@ from .config import Config from .utillib import load_file -cfg = Config() # config object +cfg = Config() # config object -#---------------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------------- def usage(): option_names = cfg.get_names() cmd = cfg.get_cmd() @@ -45,7 +46,7 @@ def usage(): for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("Examples:") print(f" {cmd} /shared/tall.h5 tall.h5") @@ -56,13 +57,12 @@ def usage(): print("") sys.exit() -#end print_usage +# end print_usage -#---------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------- def main(): - - cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite target") + cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite target") cfg.setitem("nodata", False, flags=["--nodata",], help="do not copy dataset data") cfg.setitem("help", False, flags=["-h", "--help"], help="this message") @@ -87,7 +87,7 @@ def main(): logfname = cfg["logfile"] loglevel = cfg.get_loglevel() logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) - logging.debug(f"set log_level to {loglevel}") + logging.debug(f"set log_level to {loglevel}") logging.info(f"source domain: {src_domain}") logging.info(f"target file: {des_file}") @@ -129,6 +129,8 @@ def main(): except KeyboardInterrupt: logging.error('Aborted by user via keyboard interrupt.') sys.exit(1) -#__main__ + + +# __main__ if __name__ == "__main__": main() diff --git a/h5pyd/_apps/hsinfo.py b/h5pyd/_apps/hsinfo.py index 7a65c83b..993aa6f5 100644 --- a/h5pyd/_apps/hsinfo.py +++ b/h5pyd/_apps/hsinfo.py @@ -26,6 +26,7 @@ cfg = Config() + # # Usage # @@ -38,12 +39,12 @@ def usage(): print("Description:") print(" Get status information from server") print("") - + print("Options:") for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("examples:") print(f" {cmd} -e http://hsdshdflab.hdfgroup.org") @@ -52,8 +53,9 @@ def usage(): print("") sys.exit() + # -# +# getUpTime # def getUpTime(start_time): now = int(time.time()) @@ -187,13 +189,12 @@ def main(): if domains: sys.exit("Use the hsstat command to get information about about a folder or domain ") - if not cfg["hs_endpoint"]: logging.error("endpoint not set") usage() - + getServerInfo(cfg) - + if __name__ == "__main__": main() diff --git a/h5pyd/_apps/hsload.py b/h5pyd/_apps/hsload.py index 176187af..c0dae7c4 100755 --- a/h5pyd/_apps/hsload.py +++ b/h5pyd/_apps/hsload.py @@ -40,6 +40,7 @@ cfg = Config() + # ---------------------------------------------------------------------------------- def abort(msg): logging.error(msg) @@ -48,6 +49,7 @@ def abort(msg): logging.error("exiting program with return code -1") sys.exit(-1) + # ---------------------------------------------------------------------------------- def usage(): option_names = cfg.get_names() @@ -66,7 +68,7 @@ def usage(): for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("Note about --link option:") print(" --link enables just the source HDF5 metadata to be ingested while the dataset data") @@ -81,7 +83,6 @@ def usage(): print(cfg.get_see_also(cmd)) print("") sys.exit(-1) - # end print_usage @@ -92,18 +93,25 @@ def main(): COMPRESSION_FILTERS = ("blosclz", "lz4", "lz4hc", "snappy", "gzip", "zstd") - s3 = None # S3FS instance + s3 = None # S3FS instance cfg.setitem("append", False, flags=["-a", "--append"], help="append to existing domain") - cfg.setitem("extend_dim", None, flags=["--extend",], choices=["DIMSCALE",], help="extend along given dimensionscale") - cfg.setitem("extend_offset", None, flags=["--extend-offset"], choices=["N",], help="write data at index n along extended dimension") - cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite target") + cfg.setitem("extend_dim", None, flags=["--extend",], choices=["DIMSCALE",], + help="extend along given dimensionscale") + cfg.setitem("extend_offset", None, flags=["--extend-offset"], choices=["N",], + help="write data at index n along extended dimension") + cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite target") cfg.setitem("nodata", False, flags=["--nodata",], help="do not copy dataset data") - cfg.setitem("z", None, flags=["-z",], choices=["N",], help="apply compression filter to any non-compressed datasets, n: [0-9]") - cfg.setitem("link", None, flags=["--link",], help="Link to dataset data (sourcefile given as /) or s3uri") - cfg.setitem("fastlink", None, flags=["--fastlink",], help="Link to dataset data without initializing chunk locations (will be set server side)") - cfg.setitem("linkpath", None, flags=["--linkpath",], choices=["PATH_URI",], help="Use the given URI for the link references rather than the src path") - cfg.setitem("compression", None, flags=["--compression",], choices=COMPRESSION_FILTERS, help="use the given compression algorithm for -z option (lz4 is default)") + cfg.setitem("z", None, flags=["-z",], choices=["N",], + help="apply compression filter to any non-compressed datasets, n: [0-9]") + cfg.setitem("link", None, flags=["--link",], + help="Link to dataset data (sourcefile given as /) or s3uri") + cfg.setitem("fastlink", None, flags=["--fastlink",], + help="Link to dataset data without initializing chunk locations (will be set server side)") + cfg.setitem("linkpath", None, flags=["--linkpath",], choices=["PATH_URI",], + help="Use the given URI for the link references rather than the src path") + cfg.setitem("compression", None, flags=["--compression",], choices=COMPRESSION_FILTERS, + help="use the given compression algorithm for -z option (lz4 is default)") cfg.setitem("ignorefilters", False, flags=["--ignore-filters"], help="ignore any filters used by source dataset") cfg.setitem("retries", 3, flags=["--retries",], choices=["N",], help="Set number of server retry attempts") cfg.setitem("help", False, flags=["-h", "--help"], help="this message") @@ -124,7 +132,7 @@ def main(): logfname = cfg["logfile"] loglevel = cfg.get_loglevel() logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel) - logging.debug(f"set log_level to {loglevel}") + logging.debug(f"set log_level to {loglevel}") if cfg["linkpath"] and not cfg["link"]: abort("--linkpath option can only be used with --link") @@ -146,7 +154,7 @@ def main(): dataload = None else: dataload = "ingest" - + logging.info(f"source files: {src_files}") logging.info(f"target domain: {domain}") if len(src_files) > 1 and domain[-1] != "/": @@ -157,14 +165,12 @@ def main(): logging.info("checking libversion") if ( - h5py.version.version_tuple.major == 2 - and h5py.version.version_tuple.minor < 10 + h5py.version.version_tuple.major == 2 and h5py.version.version_tuple.minor < 10 ): abort("link option requires h5py version 2.10 or higher") - + if h5py.version.hdf5_version_tuple < (1, 10, 6): abort("link option requires h5py version 2.10 or higher") - try: @@ -198,7 +204,7 @@ def main(): fin = h5py.File(s3.open(src_file, "rb"), moe="r") except IOError as ioe: abort(f"Error opening file {src_file}: {ioe}") - + else: if cfg["link"] or cfg["fastlink"]: if op.isabs(src_file) and not cfg["linkpath"]: @@ -241,12 +247,12 @@ def main(): if cfg["linkpath"]: # now that we have a handle to the source file, - # repurpose s3path to the s3uri that will actually get stored + # repurpose s3path to the s3uri that will actually get stored # in the target domain s3path = cfg["linkpath"] if cfg["no_clobber"]: - if cfg["append"]: + if cfg["append"]: # no need to check for clobber if not in append mode no_clobber = True else: @@ -272,7 +278,6 @@ def main(): else: compression_opts = None - # do the actual load kwargs = { "verbose": cfg["verbose"], @@ -286,7 +291,7 @@ def main(): "extend_offset": cfg["extend_offset"], "ignore_error": cfg["ignore_error"], "no_clobber": no_clobber - } + } load_file(fin, fout, **kwargs) msg = f"File {src_file} uploaded to domain: {tgt}" diff --git a/h5pyd/_apps/hsls.py b/h5pyd/_apps/hsls.py index 59541075..1b234f5d 100644 --- a/h5pyd/_apps/hsls.py +++ b/h5pyd/_apps/hsls.py @@ -16,6 +16,7 @@ cfg = Config() + def intToStr(n): if cfg["human_readable"]: s = "{:,}".format(n) @@ -117,7 +118,6 @@ def dump(name, obj, visited=None): if class_name in ("Dataset", "Table"): is_dataset = True - if is_dataset: desc = getShapeText(obj) obj_id = obj.id.id @@ -141,7 +141,7 @@ def dump(name, obj, visited=None): if cfg["verbose"] and is_dataset and obj.shape is not None \ and obj.chunks is not None: chunk_size = obj.dtype.itemsize - + if isinstance(obj.id.layout, dict): # H5D_CHUNKED_REF layout chunk_dims = obj.id.layout["dims"] @@ -185,7 +185,7 @@ def dump(name, obj, visited=None): if num_chunks is not None and allocated_size is not None: fstr = " {0:>32}: {1} {2} bytes, {3}/{4} {5} chunks" - s = fstr.format("Chunks", chunk_dims, intToStr(chunk_size), intToStr(num_chunks), + s = fstr.format("Chunks", chunk_dims, intToStr(chunk_size), intToStr(num_chunks), intToStr(max_chunk_count), chunk_type) print(s) if dset_size > 0: @@ -206,7 +206,7 @@ def dump(name, obj, visited=None): print(fstr.format("Chunks", chunk_dims, intToStr(chunk_size))) # show filters (if any) - # currently HSDS only supports the shuffle filter (not fletcher32 or + # currently HSDS only supports the shuffle filter (not fletcher32 or # scaleoffset), so just check for shuffle and whatever compressor may # be applied filter_number = 0 @@ -294,7 +294,7 @@ def getFolder(domain): username = cfg["hs_username"] password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] - bucket = cfg["hs_bucket"] + bucket = cfg["hs_bucket"] pattern = cfg["pattern"] query = cfg["query"] if cfg["verbose"]: @@ -303,7 +303,7 @@ def getFolder(domain): verbose = False batch_size = 100 # use smaller batchsize for interactively listing of large collections d = h5py.Folder(domain, endpoint=endpoint, username=username, verbose=verbose, - password=password, bucket=bucket, pattern=pattern, query=query, batch_size=batch_size) + password=password, bucket=bucket, pattern=pattern, query=query, batch_size=batch_size) return d @@ -316,6 +316,7 @@ def getFile(domain): password=password, bucket=bucket, use_cache=True) return fh + def isFile(domain): username = cfg["hs_username"] password = cfg["hs_password"] @@ -323,8 +324,7 @@ def isFile(domain): bucket = cfg["hs_bucket"] return h5py.is_hdf5(domain, endpoint=endpoint, username=username, - password=password, bucket=bucket) - + password=password, bucket=bucket) def visitDomains(domain, depth=1): @@ -368,8 +368,8 @@ def visitDomains(domain, depth=1): if not cfg["names_only"]: print("{:35} {:15} {:8} {} {}".format(owner, format_size(num_bytes), - dir_class, timestamp, - display_name)) + dir_class, timestamp, + display_name)) count += 1 if cfg["showacls"]: dumpAcls(d) @@ -386,7 +386,7 @@ def visitDomains(domain, depth=1): cfg["total_size"] = 0 cfg["total_size"] += item["total_size"] else: - num_bytes = " " + num_bytes = " " dir_class = item["class"] if item["lastModified"] is None: timestamp = "" @@ -398,8 +398,8 @@ def visitDomains(domain, depth=1): print(full_path) else: print("{:35} {:15} {:8} {} {}".format(owner, format_size(num_bytes), - dir_class, timestamp, - full_path)) + dir_class, timestamp, + full_path)) if cfg["showacls"]: if dir_class == "folder": with getFolder(domain + '/' + name + '/') as f: @@ -432,7 +432,7 @@ def checkDomain(path): """ Convenience method to specify a domain + h5path as a single string. Walk up the path items, as soon as the parent is a domain or folder return it. Supply the other part as h5path. """ - + path_names = path.split("/") h5path = "" while path_names: @@ -461,12 +461,12 @@ def usage(): print(" domain: HSDS domain (absolute path with or without 'hdf5:// prefix)") print(" folder: HSDS folder (path as above ending in '/')") print("") - + print("Options:") for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print(f"example: {cmd} -r -e http://hsdshdflab.hdfgroup.org /shared/tall.h5") print("") @@ -484,14 +484,18 @@ def main(): # additional options cfg.setitem("showacls", False, flags=["--showacls",], help="display domain ACLs") cfg.setitem("showattrs", False, flags=["--showattrs",], help="display domain attributes") - cfg.setitem("pattern", None, flags=["--pattern",], choices=["REGEX",], help="list domains that match the given regex") - cfg.setitem("query", None, flags=["--query",], choices=["QUERY",], help="list domains where the attributes of the root group match the given query string") + cfg.setitem("pattern", None, flags=["--pattern",], choices=["REGEX",], + help="list domains that match the given regex") + cfg.setitem("query", None, flags=["--query",], choices=["QUERY",], + help="list domains where the attributes of the root group match the given query string") cfg.setitem("recursive", False, flags=["-r", "--recursive"], help="recursively list sub-folders or sub-groups") cfg.setitem("dataset_path", None, flags=["-d", "--dataset"], choices=["H5PATH",], help="display specified dataset") cfg.setitem("group_path", None, flags=["-g", "--group"], choices=["H5PATH",], help="display specified group") - cfg.setitem("datatype_path", None, flags=["-t", "--datatype"], choices=["H5PATH",], help="display specified datatype") + cfg.setitem("datatype_path", None, flags=["-t", "--datatype"], choices=["H5PATH",], + help="display specified datatype") cfg.setitem("names_only", False, flags=["-n", "--names"], help="list just folder names or link titles") - cfg.setitem("human_readable", False, flags=["-H", "--human-readable"], help="with -v, print human readable sizes (e.g. 123M)") + cfg.setitem("human_readable", False, flags=["-H", "--human-readable"], + help="with -v, print human readable sizes (e.g. 123M)") cfg.setitem("help", False, flags=["-h", "--help"], help="this message") try: @@ -521,13 +525,13 @@ def main(): if domain.endswith('/'): # given a folder path count = visitDomains(domain, depth=depth) - if not cfg["names_only"]: + if not cfg["names_only"]: print(f"{count} items") else: res = checkDomain(domain) if res is None: - # couldn't find a domain, call getFile anyway so we can + # couldn't find a domain, call getFile anyway so we can # report on exactly what went wrong pass else: @@ -612,7 +616,7 @@ def main(): if h5path[-1] != "/": h5path = h5path + "/" h5path = h5path + dataset_path - print("using h5path:", h5path) + print("using h5path:", h5path) if h5path not in grp: print("dataset path: {h5path} not found") continue @@ -636,7 +640,7 @@ def main(): if h5path[-1] != "/": h5path = h5path + "/" h5path = h5path + datatype_path - print("using h5path:", h5path) + print("using h5path:", h5path) if h5path not in grp: print("datatype path: {h5path} not found") continue diff --git a/h5pyd/_apps/hsmv.py b/h5pyd/_apps/hsmv.py index 3713327a..d0638119 100755 --- a/h5pyd/_apps/hsmv.py +++ b/h5pyd/_apps/hsmv.py @@ -22,7 +22,8 @@ cfg = Config() -#---------------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------------- def usage(): option_names = cfg.get_names() cmd = cfg.get_cmd() @@ -40,7 +41,7 @@ def usage(): for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("Examples:") print(f" {cmd} /home/myfolder/file1.h5 /home/myfolder/file2.h5") @@ -49,25 +50,26 @@ def usage(): print(cfg.get_see_also(cmd)) print("") sys.exit(-1) -#end print_usage +# end print_usage + -#---------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------- def print_config_example(): print("# default") print("hs_username = ") print("hs_password = ") print("hs_endpoint = http://hsdshdflab.hdfgroup.org") -#print_config_example +# print_config_example -#---------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------- def getFolder(domain, mode="r"): username = cfg["hs_username"] password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] dir = h5pyd.Folder(domain, endpoint=endpoint, username=username, - password=password, bucket=bucket, mode=mode) + password=password, bucket=bucket, mode=mode) return dir @@ -77,23 +79,25 @@ def getFile(domain, mode="r"): endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] fh = h5pyd.File(domain, mode=mode, endpoint=endpoint, username=username, - password=password, bucket=bucket, use_cache=True) + password=password, bucket=bucket, use_cache=True) return fh + def createFile(domain, linked_domain=None, no_clobber=False): - #print("createFile", domain) + # print("createFile", domain) username = cfg["hs_username"] password = cfg["hs_password"] endpoint = cfg["hs_endpoint"] bucket = cfg["hs_bucket"] owner = None if no_clobber: - mode= "x" + mode = "x" else: - mode="w" + mode = "w" if "hs_owner" in cfg: - owner=cfg["hs_owner"] - fh = h5pyd.File(domain, mode=mode, endpoint=endpoint, username=username, password=password, bucket=bucket, owner=owner, linked_domain=linked_domain) + owner = cfg["hs_owner"] + fh = h5pyd.File(domain, mode=mode, endpoint=endpoint, username=username, + password=password, bucket=bucket, owner=owner, linked_domain=linked_domain) return fh @@ -137,10 +141,13 @@ def deleteDomain(domain, keep_root=False): else: print(f"Domain: {domain} deleted") + def main(): - cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], help="do not overwrite any domains") - cfg.setitem("hs_owner", None, flags=["-o", "--owner"], choices=["OWNER",], help="set owner (must be run as an admin user)") + cfg.setitem("no_clobber", False, flags=["-n", "--no-clobber"], + help="do not overwrite any domains") + cfg.setitem("hs_owner", None, flags=["-o", "--owner"], choices=["OWNER",], + help="set owner (must be run as an admin user)") cfg.setitem("help", False, flags=["-h", "--help"], help="this message") try: @@ -208,6 +215,7 @@ def main(): logging.error(msg) sys.exit(str(oe)) + # __main__ if __name__ == "__main__": main() diff --git a/h5pyd/_apps/hsstat.py b/h5pyd/_apps/hsstat.py index 59c2187e..2148c4c2 100644 --- a/h5pyd/_apps/hsstat.py +++ b/h5pyd/_apps/hsstat.py @@ -26,6 +26,7 @@ cfg = Config() + # # log error and abort app # @@ -37,6 +38,7 @@ def abort(msg): logging.error("exiting program with return code -1") sys.exit(-1) + # # Usage # @@ -52,12 +54,12 @@ def usage(): print(" domain: HSDS domain (absolute path with or without 'hdf5:// prefix)") print(" folder: HSDS folder (path as above ending in '/')") print("") - + print("Options:") for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print("examples:") print(f" {cmd} -e http://hsdshdflab.hdfgroup.org") @@ -174,13 +176,16 @@ def getDomainInfo(domain, cfg): print(f" linked chunks: {f.num_linked_chunks}") f.close() + + # # Main # def main(): domains = [] - cfg.setitem("human_readable", False, flags=["-H", "--human-readable"], help="print human readable sizes (e.g. 123M)") + cfg.setitem("human_readable", False, flags=["-H", "--human-readable"], + help="print human readable sizes (e.g. 123M)") cfg.setitem("rescan", False, flags=["--rescan",], help="refresh domain stats (for use when domain is provided)") cfg.setitem("help", False, flags=["-h", "--help"], help="this message") diff --git a/h5pyd/_apps/hstouch.py b/h5pyd/_apps/hstouch.py index b7497f8c..3c7cdd64 100644 --- a/h5pyd/_apps/hstouch.py +++ b/h5pyd/_apps/hstouch.py @@ -13,6 +13,7 @@ cfg = Config() + def getFolder(domain): username = cfg["hs_username"] password = cfg["hs_password"] @@ -22,6 +23,7 @@ def getFolder(domain): dir = h5py.Folder(domain, endpoint=endpoint, username=username, password=password, bucket=bucket) return dir + def createFolder(domain): username = cfg["hs_username"] password = cfg["hs_password"] @@ -29,11 +31,13 @@ def createFolder(domain): bucket = cfg["hs_bucket"] owner = None if "hs_owner" in cfg: - owner=cfg["hs_owner"] + owner = cfg["hs_owner"] logging.debug(f"createFolder({domain})") - dir = h5py.Folder(domain, mode='x', endpoint=endpoint, username=username, password=password, bucket=bucket, owner=owner) + dir = h5py.Folder(domain, mode='x', endpoint=endpoint, username=username, + password=password, bucket=bucket, owner=owner) return dir + def getFile(domain, mode="a"): username = cfg["hs_username"] password = cfg["hs_password"] @@ -43,6 +47,7 @@ def getFile(domain, mode="a"): fh = h5py.File(domain, mode=mode, endpoint=endpoint, username=username, password=password, bucket=bucket) return fh + def createFile(domain): username = cfg["hs_username"] password = cfg["hs_password"] @@ -50,11 +55,13 @@ def createFile(domain): bucket = cfg["hs_bucket"] owner = None if "hs_owner" in cfg: - owner=cfg["hs_owner"] + owner = cfg["hs_owner"] logging.debug(f"createFile({domain})") - fh = h5py.File(domain, mode='x', endpoint=endpoint, username=username, password=password, bucket=bucket, owner=owner) + fh = h5py.File(domain, mode='x', endpoint=endpoint, username=username, + password=password, bucket=bucket, owner=owner) return fh + def getParentDomain(domain): if domain[-1] == '/': if len(domain) > 1: @@ -64,6 +71,7 @@ def getParentDomain(domain): parent_domain += "/" return parent_domain + def touchDomain(domain): # get handle to parent folder parent_domain = getParentDomain(domain) @@ -78,7 +86,7 @@ def touchDomain(domain): try: getFolder(parent_domain) except IOError as oe: - #print("errno:", oe.errno) + # print("errno:", oe.errno) if oe.errno in (404, 410): # Not Found sys.exit(f"Parent domain: {parent_domain} not found") elif oe.errno == 401: # Unauthorized @@ -135,6 +143,7 @@ def touchDomain(domain): except IOError as oe: sys.exit(f"Got error updating domain: {oe}") + # # Usage # @@ -154,20 +163,22 @@ def usage(): for name in option_names: help_msg = cfg.get_help_message(name) if help_msg: - print(f" {help_msg}") + print(f" {help_msg}") print("") print(f"Example: {cmd} hdf5://home/myfolder/emptydomain.h5") print(cfg.get_see_also(cmd)) print("") sys.exit() + # # Main # def main(): domains = [] # additional options - cfg.setitem("hs_owner", None, flags=["-o", "--owner"], choices=["OWNER",], help="set owner (must be run as an admin user)") + cfg.setitem("hs_owner", None, flags=["-o", "--owner"], choices=["OWNER",], + help="set owner (must be run as an admin user)") cfg.setitem("help", False, flags=["-h", "--help"], help="this message") try: @@ -189,5 +200,6 @@ def main(): for domain in domains: touchDomain(domain) + if __name__ == "__main__": main() diff --git a/h5pyd/_apps/utillib.py b/h5pyd/_apps/utillib.py index 860ce25e..eb7a2ddc 100755 --- a/h5pyd/_apps/utillib.py +++ b/h5pyd/_apps/utillib.py @@ -22,22 +22,25 @@ sys.exit(1) # copy rather than link for any datasets with product of extents less than the following -MIN_DSET_ELEMENTS_FOR_LINKING=512 +MIN_DSET_ELEMENTS_FOR_LINKING = 512 # adjust chunk shape to fit between min and max chunk sizes when possible MIN_CHUNK_SIZE = 1 * 1024 * 1024 MAC_CHUNK_SIZE = 8 * 1024 * 1024 -H5Z_FILTER_MAP = { 32001: "blosclz", - 32004: "lz4", - 32008: "bitshuffle", - 32015: "zstd", +H5Z_FILTER_MAP = { + 32001: "blosclz", + 32004: "lz4", + 32008: "bitshuffle", + 32015: "zstd", } # check if hdf5 library version supports chunk iteration -hdf_library_version = h5py.version.hdf5_version_tuple -library_has_chunk_iter = (hdf_library_version >= (1, 14, 0) or (hdf_library_version < (1, 12, 0) and (hdf_library_version >= (1, 10, 10)))) +hdf_library_version = h5py.version.hdf5_version_tuple +library_has_chunk_iter = (hdf_library_version >= (1, 14, 0) or ( + hdf_library_version < (1, 12, 0) and (hdf_library_version >= (1, 10, 10)))) + def dump_dtype(dt): if not isinstance(dt, np.dtype): @@ -113,6 +116,7 @@ def has_reference(dtype): has_ref = has_reference(basedt) return has_ref + def get_reftype(obj): if is_h5py(obj): ref_type = h5py. special_dtype(ref=h5py.Reference) @@ -238,7 +242,7 @@ def copy_element(val, src_dt, tgt_dt, ctx): else: out = "" # h5pyd refs are strings - if ref and val: + if ref and val: try: fin_obj = fin[val] except AttributeError as ae: @@ -262,7 +266,6 @@ def copy_element(val, src_dt, tgt_dt, ctx): # convert to string for JSON serialization out = str(fout_obj.ref) - elif is_regionreference(ref): out = "tbd" else: @@ -336,7 +339,7 @@ def copy_attribute(desobj, name, srcobj, ctx): tgtarr = None data = srcobj.attrs[name] src_dt = None - + # check for non-numpy types that might get returned if is_regionreference(data): msg = "regionreference types not supported, " @@ -368,7 +371,7 @@ def copy_attribute(desobj, name, srcobj, ctx): # convert to numpy type data = np.asarray(data) src_dt = data.dtype - + if src_dt.kind == "S" and isinstance(data, bytes): # check that this is actually utf-encodable try: @@ -392,7 +395,6 @@ def copy_attribute(desobj, name, srcobj, ctx): else: des_empty = h5pyd.Empty - if isinstance(data, src_empty): # create Empty object with tgt dtype tgt_dt = convert_dtype(src_dt, ctx) @@ -410,6 +412,7 @@ def copy_attribute(desobj, name, srcobj, ctx): if not ctx["ignore_error"]: raise IOError(msg) + # "safe" resize method where new extent can be <= existing extent def resize_dataset(dset, extent, axis=0): logging.debug(f"resize_dataset {dset} to {extent}") @@ -420,6 +423,7 @@ def resize_dataset(dset, extent, axis=0): if dset.shape[axis] < extent: raise + # ---------------------------------------------------------------------------------- def get_chunk_layout(dset): if is_h5py(dset): @@ -434,7 +438,6 @@ def get_chunk_layout(dset): layout = dset_json["layout"] logging.debug(f"got chunk layout for dset id: {dset.id.id}: {layout}") return layout - # ---------------------------------------------------------------------------------- @@ -461,6 +464,7 @@ def get_chunk_dims(dset): chunk_dims = None return chunk_dims + # ---------------------------------------------------------------------------------- def get_chunktable_dims(dset): rank = len(dset.shape) @@ -478,6 +482,7 @@ def get_chunktable_dims(dset): table_dims = tuple(table_dims) return table_dims + # ---------------------------------------------------------------------------------- def get_num_chunks(dset): if dset.shape is None: @@ -508,6 +513,7 @@ def get_num_chunks(dset): num_chunks = 1 return num_chunks + # ---------------------------------------------------------------------------------- def get_dset_offset(dset): """ Return dataset file offset for HDF5 contiguous datasets """ @@ -519,7 +525,8 @@ def get_dset_offset(dset): if offset is None: return -1 return offset - + + # ---------------------------------------------------------------------------------- def get_chunktable_dtype(include_file_uri=False): if include_file_uri: @@ -529,6 +536,7 @@ def get_chunktable_dtype(include_file_uri=False): dt = np.dtype([("offset", np.int64), ("size", np.int32)]) return dt + # ---------------------------------------------------------------------------------- def get_chunk_table_index(chunk_offset, chunk_dims): if len(chunk_offset) != len(chunk_dims): @@ -537,9 +545,10 @@ def get_chunk_table_index(chunk_offset, chunk_dims): rank = len(chunk_offset) chunk_index = [] for i in range(rank): - chunk_index.append(chunk_offset[i]//chunk_dims[i]) + chunk_index.append(chunk_offset[i] // chunk_dims[i]) return tuple(chunk_index) + # ---------------------------------------------------------------------------------- def get_chunk_locations(dset, ctx, include_file_uri=False): if not is_h5py(dset): @@ -549,7 +558,7 @@ def get_chunk_locations(dset, ctx, include_file_uri=False): return None else: raise IOError(msg) - + if dset.chunks is None: msg = "get_chunklocations - dataset is not chunked" logging.error(msg) @@ -557,7 +566,7 @@ def get_chunk_locations(dset, ctx, include_file_uri=False): return None else: raise IOError(msg) - + rank = len(dset.shape) spaceid = dset.id.get_space() @@ -601,18 +610,18 @@ def get_chunk_locations(dset, ctx, include_file_uri=False): e = (chunk_offset, chunk_size, s3path) else: e = (chunk_offset, chunk_size) - + chunk_arr[...] = e # return one-element array return chunk_arr - + # get chunk locations for non-contiguous datasets chunk_dims = get_chunk_dims(dset) if library_has_chunk_iter: def init_chunktable_callback(chunk_info): - # Use chunk offset as index + # Use chunk offset as index index = get_chunk_table_index(chunk_info[0], chunk_dims) byte_offset = chunk_info[2] chunk_size = chunk_info[3] @@ -621,20 +630,19 @@ def init_chunktable_callback(chunk_info): msg = f"Unexpected array_offset: {index} for dataset with rank: {rank}" logging.error(msg) raise IOError(msg) - + if include_file_uri: e = (byte_offset, chunk_size, s3path) else: e = (byte_offset, chunk_size) chunk_arr[index] = e - + dset.id.chunk_iter(init_chunktable_callback) - else: + else: # Using old HDF5 version without H5Dchunk_iter num_chunks = get_num_chunks(dset) - for i in range(num_chunks): chunk_info = dset.id.get_chunk_info(i, spaceid) index = get_chunk_table_index(chunk_info[0], chunk_dims) @@ -645,19 +653,20 @@ def init_chunktable_callback(chunk_info): msg = f"Unexpected array_offset: {index} for dataset with rank: {rank}" logging.error(msg) raise IOError(msg) - + if include_file_uri: e = (byte_offset, chunk_size, s3path) else: e = (byte_offset, chunk_size) - + chunk_arr[index] = e - + if i % 5000 == 0: logging.info(f"{i} chunks indexed") return chunk_arr + # ---------------------------------------------------------------------------------- def create_chunktable(dset, dset_dims, ctx): logging.debug(f"create_chunktable({dset}, {dset_dims}") @@ -679,7 +688,7 @@ def create_chunktable(dset, dset_dims, ctx): dt = get_chunktable_dtype(include_file_uri=include_file_uri) chunktable_dims = [0,] if extend else [] - + chunktable_dims.extend(get_chunktable_dims(dset)) chunktable_dims = tuple(chunktable_dims) logging.debug(f"chunktable_dims: {chunktable_dims}") @@ -707,12 +716,12 @@ def create_chunktable(dset, dset_dims, ctx): logging.error(msg) raise ValueError(msg) bucket = linkpath[:index] - filepath = linkpath[(index+1):] + filepath = linkpath[(index + 1):] initializer_opts.append(f"--filepath={filepath}") initializer_opts.append(f"--bucket={bucket}") logging.info(f"using initializer: {initializer_opts}") kwargs["initializer_opts"] = initializer_opts - + anon_dset = fout.create_dataset(None, **kwargs) msg = f"created chunk table: {anon_dset}" logging.info(msg) @@ -762,7 +771,7 @@ def create_chunktable(dset, dset_dims, ctx): continue else: raise IOError(msg) - + chunk_key = "" for dim in range(rank): chunk_key += str(index[dim] // chunk_dims[dim]) @@ -835,7 +844,7 @@ def update_chunktable(src, tgt, ctx): if not ctx["ignore_error"]: raise IOError(msg) return - + layout = get_chunk_layout(src) layout_class = layout["class"] if layout_class == "H5D_CONTIGUOUS_REF": @@ -857,7 +866,7 @@ def update_chunktable(src, tgt, ctx): index = tuple(index) chunk_arr[index] = v elif layout_class == "H5D_CHUNKED_REF_INDIRECT": - file_uri = layout["file_uri"] + file_uri = layout["file_uri"] orig_chunktable_id = layout["chunk_table"] orig_chunktable = fout[f"datasets/{orig_chunktable_id}"] # iterate through contents and add file uri @@ -881,7 +890,7 @@ def update_chunktable(src, tgt, ctx): if not ctx["ignore_error"]: raise IOError(msg) return - + if len(tgt.shape) > len(src.shape): # append mode, extend the first dimension of table by one extent = chunktable.shape[0] + 1 @@ -890,13 +899,14 @@ def update_chunktable(src, tgt, ctx): else: chunktable[...] = chunk_arr -#---------------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------------- def expandChunk(chunk_shape, max_shape, typesize): """Extend the chunk shape until it is above the MIN target.""" if chunk_shape is None: return None - + logging.debug(f"orig chunk_shape: {chunk_shape}") rank = len(chunk_shape) @@ -907,7 +917,7 @@ def expandChunk(chunk_shape, max_shape, typesize): if rank == 0: # scalar - can't be expanded return tuple(chunk_shape) - + chunk_shape = list(chunk_shape).copy() while True: @@ -923,7 +933,7 @@ def expandChunk(chunk_shape, max_shape, typesize): for i in range(rank): # start from the low-order dimension - dim = rank - i - 1 + dim = rank - i - 1 nextent = chunk_shape[dim] if nextent * 2 <= max_shape[dim]: chunk_shape[dim] = nextent * 2 @@ -974,7 +984,7 @@ def create_dataset(dobj, ctx): if len(dset.shape) == len(dobj.shape): if dset.shape != dobj.shape: msg = f"unable to append {dobj.name}: shape is not compatible" - logging.error(msg) + logging.error(msg) if ctx["verbose"]: print(msg) return None @@ -1067,14 +1077,13 @@ def create_dataset(dobj, ctx): kwargs = {"shape": tgt_shape, "maxshape": tgt_maxshape, "dtype": tgt_dtype} - if ( - ctx["dataload"] in ("link", "fastlink") - and not is_vlen(dobj.dtype) - and dobj.shape is not None - and len(dobj.shape) > 0 - and not is_compact(dobj) - and np.prod(dobj.shape) > MIN_DSET_ELEMENTS_FOR_LINKING - ): + if (ctx["dataload"] in ("link", "fastlink") and ( + not is_vlen(dobj.dtype)) and ( + dobj.shape is not None) and ( + len(dobj.shape) > 0) and ( + not is_compact(dobj)) and ( + np.prod(dobj.shape) > MIN_DSET_ELEMENTS_FOR_LINKING)): + chunks = create_chunktable(dobj, tgt_shape, ctx) logging.info(f"using chunk layout: {chunks}") @@ -1083,9 +1092,9 @@ def create_dataset(dobj, ctx): # converting hsds dset with linked chunks to h5py dataset # just use the dims field of dobj.chunks as chunk shape chunks = get_chunk_dims(dobj) - + if chunks is not None and not is_h5py(fout): - # expand chunk if too small + # expand chunk if too small if dset_preappend is not None: # check to see if an extra dimension is needed for the chunk shape @@ -1104,7 +1113,7 @@ def create_dataset(dobj, ctx): # currently hyperchunks only supported for 1d datasets chunk_dims = expandChunk(chunk_dims, dobj.shape, dobj.dtype.itemsize) logging.debug(f"expanded chunks: {chunk_dims}") - chunks["dims"] = chunk_dims + chunks["dims"] = chunk_dims else: # contiguous or compact, using dataset shape pass @@ -1114,16 +1123,13 @@ def create_dataset(dobj, ctx): # currently hyperchunks only supported for 1d datasets chunks = expandChunk(chunks, dobj.shape, dobj.dtype.itemsize) logging.debug(f"expanded chunks: {chunks}") - + logging.debug(f"setting chunks kwargs to: {chunks}") - + kwargs["chunks"] = chunks - if ( - dobj.shape is None - or len(dobj.shape) == 0 - or (is_vlen(dobj.dtype) and is_h5py(fout)) - ): + if (dobj.shape is None or (len(dobj.shape) == 0) or ( + is_vlen(dobj.dtype) and is_h5py(fout))): # don't use compression/chunks for scalar datasets # or vlen pass @@ -1139,7 +1145,7 @@ def create_dataset(dobj, ctx): kwargs["compression_opts"] = ctx["default_compression_opts"] if ctx["verbose"]: print("applying default compression filter") - + # TBD: it would be better if HSDS could let us know what filters # are supported (like it does with compressors) # For now, just hard-ccreate_datasetcreate_datasetode fletcher32 and scaleoffset to be ignored @@ -1189,11 +1195,11 @@ def create_dataset(dobj, ctx): logging.debug(f"got fillvalue: {fillvalue}") kwargs["fillvalue"] = fillvalue - # finally, create the dataset + # finally, create the dataset msg = f"creating dataset {dobj.name}, shape: {dobj.shape}, type: {tgt_dtype}" logging.info(msg) if ctx["verbose"]: - print(msg) + print(msg) dset = fout.create_dataset(dobj.name, **kwargs) msg = f"dataset created, uuid: {dset.id.id}, " @@ -1210,13 +1216,13 @@ def create_dataset(dobj, ctx): logging.error(msg) if not ctx["ignore_error"]: raise - if dset_preappend is not None: write_dataset(dset_preappend, dset, ctx) # dset[0, ...] = dset_preappend[...] return dset + # ---------------------------------------------------------------------------------- def write_dataset(src, tgt, ctx): """write values from src dataset to target dataset.""" @@ -1307,7 +1313,7 @@ def write_dataset(src, tgt, ctx): # don't write chunks, but update chunktable for chunk ref indirect update_chunktable(src, tgt, ctx) else: - pass # skip chunkterator for link option + pass # skip chunkterator for link option return msg = f"iterating over chunks for {src.name}" @@ -1475,7 +1481,7 @@ def create_group(gobj, ctx): if ctx["verbose"]: print(msg) return - + if ctx["append"]: msg = f"skipping creation of group {gobj.name} since already found" logging.info(msg) diff --git a/h5pyd/_hl/__init__.py b/h5pyd/_hl/__init__.py index efe4200e..af6ddb00 100644 --- a/h5pyd/_hl/__init__.py +++ b/h5pyd/_hl/__init__.py @@ -11,4 +11,3 @@ ############################################################################## from __future__ import absolute_import - diff --git a/h5pyd/_hl/attrs.py b/h5pyd/_hl/attrs.py index f39caa06..d4d64a00 100644 --- a/h5pyd/_hl/attrs.py +++ b/h5pyd/_hl/attrs.py @@ -102,7 +102,7 @@ def _bytesArrayToList(self, data): if is_list: out = [] for item in data: - out.append(self._bytesArrayToList(item)) # recursive call + out.append(self._bytesArrayToList(item)) # recursive call elif isinstance(data, bytes): out = data.decode("utf-8") else: @@ -208,19 +208,19 @@ def create(self, name, data, shape=None, dtype=None): if shape is None and not isinstance(data, Empty): shape = data.shape - use_htype = None # If a committed type is given, we must use it - # in the call to h5a.create. + use_htype = None # If a committed type is given, we must use it in h5a.create. if isinstance(dtype, Datatype): use_htype = dtype.id dtype = dtype.dtype # Special case if data are complex numbers - if (data.dtype.kind == 'c' and - (dtype.names is None or - dtype.names != ('r', 'i') or - any(dt.kind != 'f' for dt, off in dtype.fields.values()) or - dtype.fields['r'][0] == dtype.fields['i'][0])): + is_complex = (data.dtype.kind == 'c') and (dtype.names is None) or ( + dtype.names != ('r', 'i')) or ( + any(dt.kind != 'f' for dt, off in dtype.fields.values())) or ( + dtype.fields['r'][0] == dtype.fields['i'][0]) + + if is_complex: raise TypeError( 'Wrong committed datatype for complex numbers: %s' % dtype.name) @@ -231,7 +231,7 @@ def create(self, name, data, shape=None, dtype=None): else: dtype = data.dtype else: - dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed + dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed # Where a top-level array type is requested, we have to do some # fiddling around to present the data as a smaller array of @@ -259,7 +259,7 @@ def create(self, name, data, shape=None, dtype=None): data = data.reshape(shape) # We need this to handle special string types. - + data = numpy.asarray(data, dtype=dtype) # Make HDF5 datatype and dataspace for the H5A calls @@ -376,7 +376,7 @@ def __contains__(self, name): try: self._parent.GET(req) except IOError: - #todo - verify this is a 404 response + # todo - verify this is a 404 response exists = False return exists diff --git a/h5pyd/_hl/base.py b/h5pyd/_hl/base.py index e67b3f7d..3b49b9f0 100644 --- a/h5pyd/_hl/base.py +++ b/h5pyd/_hl/base.py @@ -27,6 +27,7 @@ numpy_integer_types = (np.int8, np.uint8, np.int16, np.int16, np.int32, np.uint32, np.int64, np.uint64) numpy_float_types = (np.float16, np.float32, np.float64) + class FakeLock(): def __init__(self): pass @@ -37,6 +38,7 @@ def __enter__(self): def __exit__(self, a, b, c): pass + _phil = FakeLock() # Python alias for access from other modules @@ -58,6 +60,7 @@ def wrapper(*args, **kwds): functools.update_wrapper(wrapper, func, ('__name__', '__doc__')) return wrapper + def find_item_type(data): """Find the item type of a simple object or collection of objects. @@ -73,8 +76,7 @@ def find_item_type(data): """ if isinstance(data, np.ndarray): if ( - data.dtype.kind == 'O' - and not check_dtype(vlen=data.dtype) + data.dtype.kind == 'O' and not check_dtype(vlen=data.dtype) ): item_types = {type(e) for e in data.flat} else: @@ -104,6 +106,7 @@ def guess_dtype(data): return None + def is_float16_dtype(dt): if dt is None: return False @@ -111,6 +114,7 @@ def is_float16_dtype(dt): dt = np.dtype(dt) # normalize strings -> np.dtype objects return dt.kind == 'f' and dt.itemsize == 2 + def array_for_new_object(data, specified_dtype=None): """Prepare an array from data used to create a new dataset or attribute""" @@ -139,56 +143,58 @@ def array_for_new_object(data, specified_dtype=None): def _decode(item, encoding="ascii"): - """decode any byte items to python 3 strings - """ - ret_val = None - if type(item) is bytes: - ret_val = item.decode(encoding) - elif type(item) is list: - ret_val = [] - for x in item: - ret_val.append(_decode(x, encoding)) - elif type(item) is tuple: - ret_val = [] - for x in item: - ret_val.append(_decode(x, encoding)) - ret_val = tuple(ret_val) - elif type(item) is dict: - ret_val = {} - for k in dict: - ret_val[k] = _decode(item[k], encoding) - elif type(item) is np.ndarray: - x = item.tolist() - ret_val = [] - for x in item: - ret_val.append(_decode(x, encoding)) - elif type(item) in numpy_integer_types: - ret_val = int(item) - elif type(item) in numpy_float_types: - ret_val = float(item) - else: - ret_val = item - return ret_val + """ + decode any byte items to python 3 strings + """ + ret_val = None + if type(item) is bytes: + ret_val = item.decode(encoding) + elif type(item) is list: + ret_val = [] + for x in item: + ret_val.append(_decode(x, encoding)) + elif type(item) is tuple: + ret_val = [] + for x in item: + ret_val.append(_decode(x, encoding)) + ret_val = tuple(ret_val) + elif type(item) is dict: + ret_val = {} + for k in dict: + ret_val[k] = _decode(item[k], encoding) + elif type(item) is np.ndarray: + x = item.tolist() + ret_val = [] + for x in item: + ret_val.append(_decode(x, encoding)) + elif type(item) in numpy_integer_types: + ret_val = int(item) + elif type(item) in numpy_float_types: + ret_val = float(item) + else: + ret_val = item + return ret_val -""" -Convert a list to a tuple, recursively. -Example. [[1,2],[3,4]] -> ((1,2),(3,4)) -""" # TBD: this was cut & pasted from attrs.py def toTuple(rank, data): + """ + Convert a list to a tuple, recursively. + Example. [[1,2],[3,4]] -> ((1,2),(3,4)) + """ if type(data) in (list, tuple): if rank > 0: - return list(toTuple(rank-1, x) for x in data) + return list(toTuple(rank - 1, x) for x in data) else: - return tuple(toTuple(rank-1, x) for x in data) + return tuple(toTuple(rank - 1, x) for x in data) else: return data -""" -Helper - get num elements defined by a shape -""" + def getNumElements(dims): + """ + Helper - get num elements defined by a shape + """ num_elements = 0 if isinstance(dims, int): num_elements = dims @@ -200,11 +206,11 @@ def getNumElements(dims): raise ValueError("Unexpected argument") return num_elements -""" -Copy JSON array into given numpy array -""" -def copyToArray(arr, rank, index, data, vlen_base=None): +def copyToArray(arr, rank, index, data, vlen_base=None): + """ + Copy JSON array into given numpy array + """ nlen = arr.shape[rank] if len(data) != nlen: raise ValueError("Array len of {} at index: {} doesn't match data length: {}".format(nlen, index, len(data))) @@ -212,7 +218,7 @@ def copyToArray(arr, rank, index, data, vlen_base=None): index[rank] = i if rank < len(arr.shape) - 1: # recursive call - copyToArray(arr, rank+1, index, data[i], vlen_base=vlen_base) + copyToArray(arr, rank + 1, index, data[i], vlen_base=vlen_base) else: if vlen_base: if vlen_base == str: @@ -236,10 +242,12 @@ def jsonToArray(data_shape, data_dtype, data_json): # gives us a list instead. # Special case: complex numbers - if (data_dtype.names is not None and - data_dtype.names == ('r', 'i') and - all(dt.kind == 'f' for dt, off in data_dtype.fields.values()) and - data_dtype.fields['r'][0] == data_dtype.fields['i'][0]): + is_complex = data_dtype.names is not None and ( + data_dtype.names == ('r', 'i')) and ( + all(dt.kind == 'f' for dt, off in data_dtype.fields.values())) and ( + data_dtype.fields['r'][0] == data_dtype.fields['i'][0]) + + if (is_complex): itemsize = data_dtype.itemsize if itemsize == 16: cmplx_dtype = np.dtype(np.complex128) @@ -284,7 +292,6 @@ def jsonToArray(data_shape, data_dtype, data_json): converted_data = toTuple(np_shape_rank, data_json) data_json = converted_data - arr = np.array(data_json, dtype=data_dtype) # raise an exception of the array shape doesn't match the selection shape # allow if the array is a scalar and the selection shape is one element, @@ -298,10 +305,11 @@ def jsonToArray(data_shape, data_dtype, data_json): return arr -""" -Return True if the type contains variable length elements -""" + def isVlen(dt): + """ + Return True if the type contains variable length elements + """ is_vlen = False if len(dt) > 1: names = dt.names @@ -314,10 +322,11 @@ def isVlen(dt): is_vlen = True return is_vlen -""" -Get number of byte needed for given element as a bytestream -""" + def getElementSize(e, dt): + """ + Get number of byte needed for given element as a bytestream + """ # print(f"getElementSize - e: {e} dt: {dt} itemsize: {dt.itemsize}") if len(dt) > 1: count = 0 @@ -344,7 +353,7 @@ def getElementSize(e, dt): if e.dtype.kind != 'O': count = e.dtype.itemsize * nElements else: - count = nElements * vlen.itemsize # tbd - special case for strings? + count = nElements * vlen.itemsize # tbd - special case for strings? count += 4 # byte count elif isinstance(e, list) or isinstance(e, tuple): if not e: @@ -358,10 +367,10 @@ def getElementSize(e, dt): return count -""" -Get number of bytes needed to store given numpy array as a bytestream -""" def getByteArraySize(arr): + """ + Get number of bytes needed to store given numpy array as a bytestream + """ if not isVlen(arr.dtype) and arr.dtype.kind != 'O': print("not isVlen") return arr.itemsize * np.prod(arr.shape) @@ -374,19 +383,21 @@ def getByteArraySize(arr): count += getElementSize(e, dt) return count -""" -Copy to buffer at given offset -""" + def copyBuffer(src, des, offset): + """ + Copy to buffer at given offset + """ for i in range(len(src)): - des[i+offset] = src[i] + des[i + offset] = src[i] return offset + len(src) -""" -Copy element to bytearray -""" + def copyElement(e, dt, buffer, offset, vlen=None): + """ + Copy element to bytearray + """ if vlen is None and dt.metadata and "vlen" in dt.metadata: vlen = dt.metadata["vlen"] if len(dt) > 1: @@ -395,11 +406,11 @@ def copyElement(e, dt, buffer, offset, vlen=None): field_val = e[name] offset = copyElement(field_val, field_dt, buffer, offset) elif not vlen: - #print("e no vlen: {} type: {}".format(e, type(e))) + # print("e no vlen: {} type: {}".format(e, type(e))) e_buf = e.tobytes() if len(e_buf) < dt.itemsize: # extend the buffer for fixed size strings - #print("extending buffer to {}".format(dt.itemsize)) + # print("extending buffer to {}".format(dt.itemsize)) e_buf_ex = bytearray(dt.itemsize) for i in range(len(e_buf)): e_buf_ex[i] = e_buf[i] @@ -440,7 +451,7 @@ def copyElement(e, dt, buffer, offset, vlen=None): arr = np.asarray(arr1d, dtype=vlen) offset = copyBuffer(arr.tobytes(), buffer, offset) - #for item in arr1d: + # for item in arr1d: # offset = copyElement(item, dt, buffer, offset) elif isinstance(e, list) or isinstance(e, tuple): @@ -454,17 +465,18 @@ def copyElement(e, dt, buffer, offset, vlen=None): else: raise TypeError("unexpected type: {}".format(type(e))) - #print("buffer: {}".format(buffer)) + # print("buffer: {}".format(buffer)) return offset -""" -Get the count value from persisted vlen array -""" + def getElementCount(buffer, offset): - count_bytes = bytes(buffer[offset:(offset+4)]) + """ + Get the count value from persisted vlen array + """ + count_bytes = bytes(buffer[offset:(offset + 4)]) try: - arr =np.frombuffer(count_bytes, dtype=" 1024*1024*1024: + if count > 1024 * 1024 * 1024: # expect variable length element to be between 0 and 1mb raise ValueError("Variable length element size expected to be less than 1MB") return count -""" -Read element from bytearrray -""" def readElement(buffer, offset, arr, index, dt): + """ + Read element from bytearrray + """ # print(f"readElement, offset: {offset}, index: {index} dt: {dt}") if len(dt) > 1: @@ -491,7 +503,7 @@ def readElement(buffer, offset, arr, index, dt): offset = readElement(buffer, offset, e, name, field_dt) elif not dt.metadata or "vlen" not in dt.metadata: count = dt.itemsize - e_buffer = buffer[offset:(offset+count)] + e_buffer = buffer[offset:(offset + count)] offset += count try: e = np.frombuffer(bytes(e_buffer), dtype=dt) @@ -515,7 +527,7 @@ def readElement(buffer, offset, arr, index, dt): offset += 4 if count < 0: raise ValueError("Unexpected variable length data format") - e_buffer = buffer[offset:(offset+count)] + e_buffer = buffer[offset:(offset + count)] offset += count if vlen in (bytes, str): @@ -526,15 +538,15 @@ def readElement(buffer, offset, arr, index, dt): except ValueError: print("ValueError -- e_buffer:", e_buffer, "dtype:", vlen) raise - arr[index] = e + arr[index] = e return offset -""" -Return byte representation of numpy array -""" def arrayToBytes(arr, vlen=None): + """ + Return byte representation of numpy array + """ if not isVlen(arr.dtype) and vlen is None: # can just return normal numpy bytestream return arr.tobytes() @@ -548,12 +560,13 @@ def arrayToBytes(arr, vlen=None): offset = copyElement(e, arr1d.dtype, buffer, offset, vlen=vlen) return buffer -""" -Create numpy array based on byte representation -""" + def bytesToArray(data, dt, shape): + """ + Create numpy array based on byte representation + """ nelements = getNumElements(shape) - + if not isVlen(dt): # regular numpy from string arr = np.frombuffer(data, dtype=dt) @@ -562,7 +575,7 @@ def bytesToArray(data, dt, shape): offset = 0 for index in range(nelements): offset = readElement(data, offset, arr, index, dt) - + if shape is not None: if shape == () and dt.shape: # special case for scalar array with array sub-type @@ -572,7 +585,6 @@ def bytesToArray(data, dt, shape): return arr - class LinkCreationPropertyList(object): """ Represents a LinkCreationPropertyList @@ -595,7 +607,6 @@ def char_encoding(self): return self._char_encoding - class LinkAccessPropertyList(object): """ Represents a LinkAccessPropertyList @@ -605,21 +616,23 @@ class LinkAccessPropertyList(object): def __repr__(self): return "" + def default_lcpl(): """ Default link creation property list """ lcpl = LinkCreationPropertyList() return lcpl + def default_lapl(): """ Default link access property list """ lapl = LinkAccessPropertyList() return lapl + dlapl = default_lapl() dlcpl = default_lcpl() - class CommonStateObject(object): """ @@ -630,7 +643,6 @@ class CommonStateObject(object): Also implements Unicode operations. """ - @property def _lapl(self): """ Fetch the link access property list appropriate for this object @@ -675,8 +687,6 @@ def get_lcpl(coding): return name, get_lcpl(coding) return name - - def _d(self, name): """ Decode a name according to the current file settings. @@ -722,11 +732,9 @@ def __getitem__(self, args): pass # bases classes will override - def shape(self, ref): pass - def selection(self, ref): """ Get the shape of the target dataspace selection referred to by *ref* """ @@ -755,7 +763,6 @@ def read(self): def update(self): return self._update - @property def readACL(self): return self._readACL @@ -764,8 +771,6 @@ def readACL(self): def updateACL(self): return self._updateACL - - """ Proxy object which handles ACLs (access control list) @@ -859,7 +864,6 @@ def _getNameFromObjDb(self): self.log.debug("_getNameFromObjDb - could not find path") return None - @property def name(self): """ Return the full name of this object. None if anonymous. """ @@ -867,7 +871,7 @@ def name(self): obj_name = self._name except AttributeError: # name hasn't been assigned yet - obj_name = self._getNameFromObjDb() # pull from the objdb if present + obj_name = self._getNameFromObjDb() # pull from the objdb if present if obj_name: self._name = obj_name # save this if not obj_name: @@ -881,7 +885,7 @@ def name(self): elif self._id.id.startswith("t-"): req = "/datatypes/" + self._id if req: - params=params = {"getalias": 1} + params = params = {"getalias": 1} self.log.info("sending get alias request for id: {}".format(self._id.id)) obj_json = self.GET(req, params, use_cache=False) if "alias" in obj_json: @@ -890,7 +894,6 @@ def name(self): obj_name = alias[0] self._name = obj_name - return obj_name # return self._d(h5i.get_name(self.id)) @@ -958,7 +961,7 @@ def GET(self, req, params=None, use_cache=True, format="json"): raise IOError("object not initialized") # This should be the default - but explictly set anyway headers = {"Accept-Encoding": "deflate, gzip"} - + rsp = self.id._http_conn.GET(req, params=params, headers=headers, format=format, use_cache=use_cache) if rsp.status_code != 200: self.log.info(f"Got response: {rsp.status_code}") @@ -969,7 +972,7 @@ def GET(self, req, params=None, use_cache=True, format="json"): self.log.debug("returning binary content, length: " + rsp.headers['Content-Length']) else: self.log.debug("returning binary content - length unknown") - HTTP_CHUNK_SIZE=4096 + HTTP_CHUNK_SIZE = 4096 http_chunks = [] downloaded_bytes = 0 for http_chunk in rsp.iter_content(chunk_size=HTTP_CHUNK_SIZE): @@ -989,7 +992,7 @@ def GET(self, req, params=None, use_cache=True, format="json"): rsp_content = bytearray(downloaded_bytes) index = 0 for http_chunk in http_chunks: - rsp_content[index:(index+len(http_chunk))] = http_chunk + rsp_content[index:(index + len(http_chunk))] = http_chunk index += len(http_chunk) return rsp_content else: @@ -998,7 +1001,6 @@ def GET(self, req, params=None, use_cache=True, format="json"): self.log.debug(f"rsp_json - {len(rsp.text)} bytes") return rsp_json - def PUT(self, req, body=None, params=None, format="json", replace=False): if self.id.http_conn is None: raise IOError("object not initialized") @@ -1070,9 +1072,9 @@ def __init__(self, oid, file=None): """ Setup this object, given its low-level identifier """ self._id = oid self.log = self._id.http_conn.logging - self.req_prefix = None # derived class should set this to the URI of the object + self.req_prefix = None # derived class should set this to the URI of the object self._file = file - #self._name = None + # self._name = None if not self.log.handlers: # setup logging @@ -1209,6 +1211,7 @@ class MutableMappingHDF5(MappingHDF5, MutableMapping): pass + class Empty(object): """ diff --git a/h5pyd/_hl/config.py b/h5pyd/_hl/config.py index e6cc5115..6fe9f220 100755 --- a/h5pyd/_hl/config.py +++ b/h5pyd/_hl/config.py @@ -12,6 +12,7 @@ import os import json + class Config: """ User Config state @@ -41,7 +42,7 @@ def __init__(self, config_file=None, **kwargs): print("config file: {} line: {} is not valid".format(self._config_file, line_number)) continue k = line[:index].strip() - v = line[(index+1):].strip() + v = line[(index + 1):].strip() if v and v.upper() != "NONE": self._cfg[k] = v # override any config values with environment variable if found @@ -95,4 +96,3 @@ def get(self, name, default=None): return self[name] else: return default - diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py index 332a80fb..97439bb9 100644 --- a/h5pyd/_hl/dataset.py +++ b/h5pyd/_hl/dataset.py @@ -39,12 +39,10 @@ def readtime_dtype(basetype, names): """Make a NumPy dtype appropriate for reading""" # Check if basetype is the special case for storing complex numbers - if ( - basetype.names is not None - and basetype.names == ("r", "i") - and all(dt.kind == "f" for dt, off in basetype.fields.values()) - and basetype.fields["r"][0] == basetype.fields["i"][0] - ): + is_complex_basetype = basetype.names is not None and basetype.names == ("r", "i") + is_complex_basetype = is_complex_basetype and all(dt.kind == "f" for dt, off in basetype.fields.values()) + is_complex_basetype = is_complex_basetype and basetype.fields["r"][0] == basetype.fields["i"][0] + if is_complex_basetype: itemsize = basetype.itemsize if itemsize == 16: return numpy.dtype(numpy.complex128) @@ -107,8 +105,7 @@ def make_new_dset( else: shape = (shape,) if isinstance(shape, int) else tuple(shape) if data is not None and ( - numpy.prod(shape, dtype=numpy.ulonglong) - != numpy.prod(data.shape, dtype=numpy.ulonglong) + numpy.prod(shape, dtype=numpy.ulonglong) != numpy.prod(data.shape, dtype=numpy.ulonglong) ): raise ValueError("Shape tuple is incompatible with data") @@ -773,8 +770,7 @@ def __init__(self, bind): def _getVerboseInfo(self): now = time.time() if ( - self._verboseUpdated is None - or now - self._verboseUpdated > VERBOSE_REFRESH_TIME + self._verboseUpdated is None or now - self._verboseUpdated > VERBOSE_REFRESH_TIME ): # resynch the verbose data req = "/datasets/" + self.id.uuid + "?verbose=1" @@ -874,7 +870,7 @@ def __iter__(self): if shape[0] - i < numrows: numrows = shape[0] - i self.log.debug("get {} iter items".format(numrows)) - arr = self[i : numrows + i] + arr = self[i: numrows + i] yield arr[i % BUFFER_SIZE] @@ -1133,7 +1129,8 @@ def __getitem__(self, args, new_dtype=None): des_index = 0 # this is where we'll copy to the arr for each page - self.log.debug(f"paged read, chunks_per_page: {chunks_per_page} max_chunks: {max_chunks}, num_pages: {num_pages}") + self.log.debug(f"paged read, chunks_per_page: {chunks_per_page}\ + max_chunks: {max_chunks}, num_pages: {num_pages}") for page_number in range(num_pages): self.log.debug(f"page_number: {page_number}") @@ -1153,12 +1150,13 @@ def __getitem__(self, args, new_dtype=None): self.log.info(f"page_stop: {page_stop[split_dim]}") page_mshape = list(copy(mshape)) - page_mshape[mshape_split_dim] = (1 + (page_stop[split_dim] - page_start[split_dim] - 1) // sel_step[split_dim]) + page_mshape[mshape_split_dim] =\ + (1 + (page_stop[split_dim] - page_start[split_dim] - 1) // sel_step[split_dim]) page_mshape = tuple(page_mshape) self.log.info(f"page_mshape: {page_mshape}") - params["select"] = self._getQueryParam( page_start, page_stop, sel_step) + params["select"] = self._getQueryParam(page_start, page_stop, sel_step) try: rsp = self.GET(req, params=params, format="binary") except IOError as ioe: @@ -1378,7 +1376,7 @@ def __setitem__(self, args, val): raise TypeError("Unable to assign values to dataset with null shape") elif isinstance(val, Empty): - pass # no data + pass # no data if isinstance(val, Reference): # h5pyd References are just strings @@ -1425,8 +1423,7 @@ def __setitem__(self, args, val): val = tmp elif ( - isinstance(val, complex) - or getattr(getattr(val, "dtype", None), "kind", None) == "c" + isinstance(val, complex) or getattr(getattr(val, "dtype", None), "kind", None) == "c" ): if self.dtype.kind != "V" or self.dtype.names != ("r", "i"): raise TypeError( @@ -1441,9 +1438,9 @@ def __setitem__(self, args, val): val = tmp elif self.dtype.kind == "O" or ( - self.dtype.kind == "V" - and (not isinstance(val, numpy.ndarray) or val.dtype.kind != "V") - and (self.dtype.subdtype == None) + self.dtype.kind == "V" and ( + not isinstance(val, numpy.ndarray) or val.dtype.kind != "V" + ) and (self.dtype.subdtype is None) ): # TBD: Do we need something like the following in the above if condition: # (self.dtype.str != val.dtype.str) @@ -1482,7 +1479,8 @@ def __setitem__(self, args, val): shp = self.dtype.subdtype[1] # type shape valshp = val.shape[-len(shp):] if valshp != shp: # Last dimension has to match - raise TypeError("When writing to array types, last N dimensions have to match (got %s, but should be %s)" % (valshp, shp,)) + raise TypeError("When writing to array types,\ + last N dimensions have to match (got %s, but should be %s)" % (valshp, shp,)) mtype = h5t.py_create(numpy.dtype((val.dtype, shp))) mshape = val.shape[0:len(val.shape)-len(shp)] @@ -1531,7 +1529,7 @@ def __setitem__(self, args, val): return # Broadcast scalars if necessary. - if mshape == () and selection.mshape != None and selection.mshape != (): + if mshape == () and selection.mshape is not None and selection.mshape != (): self.log.debug("broadcast scalar") if self.dtype.subdtype is not None: raise TypeError("Scalar broadcasting is not supported for array dtypes") @@ -1777,7 +1775,7 @@ def write_dset_tl(self, args): Thread-local method to write to a single dataset """ dset = args[0] - idx = args[1] + # idx = args[1] write_args = args[2] write_vals = args[3] try: @@ -1787,7 +1785,7 @@ def write_dset_tl(self, args): return def __getitem__(self, args): - """ + """ Read the same slice from each of the datasets managed by this MultiManager. """ @@ -1826,7 +1824,8 @@ def __getitem__(self, args): # TODO: Handle the case where some or all datasets share an HTTPConn object with ThreadPoolExecutor(max_workers=self.max_workers) as executor: - read_futures = [executor.submit(self.read_dset_tl, (self.datasets[i], i, args)) for i in range(len(self.datasets))] + read_futures = [executor.submit(self.read_dset_tl, + (self.datasets[i], i, args)) for i in range(len(self.datasets))] ret_data = [None] * len(self.datasets) for future in as_completed(read_futures): @@ -1880,7 +1879,8 @@ def __setitem__(self, args, vals): next_port = low_port with ThreadPoolExecutor(max_workers=self.max_workers) as executor: - write_futures = [executor.submit(self.write_dset_tl, (self.datasets[i], i, args, vals[i])) for i in range(len(self.datasets))] + write_futures = [executor.submit(self.write_dset_tl, + (self.datasets[i], i, args, vals[i])) for i in range(len(self.datasets))] for future in as_completed(write_futures): try: diff --git a/h5pyd/_hl/datatype.py b/h5pyd/_hl/datatype.py index e92dae7f..82e091a3 100644 --- a/h5pyd/_hl/datatype.py +++ b/h5pyd/_hl/datatype.py @@ -14,12 +14,13 @@ import posixpath as pp -#from ..h5t import TypeID +# from ..h5t import TypeID from .base import HLObject -from .objectid import TypeID +from .objectid import TypeID from .h5type import createDataType + class Datatype(HLObject): """ @@ -47,7 +48,6 @@ def __init__(self, bind): self._dtype = createDataType(self.id.type_json) self._req_prefix = "/datatypes/" + self.id.uuid - def __repr__(self): if not self.id: return "" diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index c190258b..683f35d5 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -164,13 +164,8 @@ def __init__( dn_ids = [] # if we're passed a GroupId as domain, just initialize the file object # with that. This will be faster and enable the File object to share the same http connection. - if ( - mode is None - and endpoint is None - and username is None - and password is None - and isinstance(domain, GroupID) - ): + no_endpoint_info = endpoint is None and username is None and password is None + if (mode is None and no_endpoint_info and isinstance(domain, GroupID)): groupid = domain else: if mode and mode not in ("r", "r+", "w", "w-", "x", "a"): @@ -194,7 +189,7 @@ def __init__( for protocol in ("http://", "https://", "hdf5://", "http+unix://"): if domain.startswith(protocol): if protocol.startswith("http"): - domain = domain[len(protocol) :] + domain = domain[len(protocol):] # extract the endpoint n = domain.find("/") if n < 0: @@ -203,7 +198,7 @@ def __init__( domain = domain[n:] break else: # hdf5:// - domain = domain[(len(protocol) - 1) :] + domain = domain[(len(protocol) - 1):] if not domain: raise IOError(400, "no domain provided") @@ -394,8 +389,7 @@ def __init__( def _getVerboseInfo(self): now = time.time() if ( - self._verboseUpdated is None - or now - self._verboseUpdated > VERBOSE_REFRESH_TIME + self._verboseUpdated is None or now - self._verboseUpdated > VERBOSE_REFRESH_TIME ): # resynch the verbose data req = "/?verbose=1" diff --git a/h5pyd/_hl/filters.py b/h5pyd/_hl/filters.py index 6fcd3267..413bc4fd 100644 --- a/h5pyd/_hl/filters.py +++ b/h5pyd/_hl/filters.py @@ -136,10 +136,9 @@ def rq_tuple(tpl, name): ) # End argument validation + has_filter = any((shuffle, fletcher32, compression, maxshape, scaleoffset is not None)) if (chunks is True) or ( - chunks is None - and layout is not None - and any((shuffle, fletcher32, compression, maxshape, scaleoffset is not None)) + chunks is None and layout is not None and has_filter ): chunks = guess_chunk(shape, maxshape, dtype.itemsize) @@ -277,7 +276,6 @@ def rq_tuple(tpl, name): initializer.extend(initializer_opts) plist["initializer"] = initializer - return plist @@ -339,8 +337,7 @@ def guess_chunk(shape, maxshape, typesize): chunk_bytes = np.prod(chunks) * typesize if ( - chunk_bytes < target_size - or abs(chunk_bytes - target_size) / target_size < 0.5 + chunk_bytes < target_size or abs(chunk_bytes - target_size) / target_size < 0.5 ) and chunk_bytes < CHUNK_MAX: break diff --git a/h5pyd/_hl/group.py b/h5pyd/_hl/group.py index cf4a95ff..831b3775 100644 --- a/h5pyd/_hl/group.py +++ b/h5pyd/_hl/group.py @@ -26,6 +26,7 @@ from .datatype import Datatype from . import h5type + def isUUID(name): # return True if name looks like an object id # There are some additional checks we could add to reduce false positives @@ -49,7 +50,7 @@ class Group(HLObject, MutableMappingHDF5): """ def __init__(self, bind, **kwargs): - #print "group init, bind:", bind + # print "group init, bind:", bind """ Create a new Group object by binding to a low-level GroupID. """ @@ -73,7 +74,7 @@ def _get_link_json(self, h5path): in_group = False # may belong to some other group if h5path[0] == '/': - #abs path, start with root + # abs path, start with root # get root_uuid parent_uuid = self.id.http_conn.root_uuid # make a fake tgt_json to represent 'link' to root group @@ -118,8 +119,8 @@ def _get_link_json(self, h5path): # use server side look ups for non-hardlink paths group_uuid = None self.log.debug("objdb search: non-hardlink") - #tgt_json = None - #break + # tgt_json = None + # break else: group_uuid = tgt_json["id"] @@ -180,7 +181,6 @@ def _get_objdb_links(self): group_json = objdb[self.id.id] return group_json["links"] - def create_group(self, h5path): """ Create and return a new subgroup. @@ -191,12 +191,11 @@ def create_group(self, h5path): if isinstance(h5path, bytes): h5path = h5path.decode('utf-8') - #if self.__contains__(name): + # if self.__contains__(name): # raise ValueError("Unable to create link (Name alredy exists)") if h5path[-1] == '/': raise ValueError("Invalid path for create_group") - if h5path[0] == '/': # absolute path parent_uuid = self.file.id.id # uuid of root group @@ -207,7 +206,6 @@ def create_group(self, h5path): self.log.info("create_group: {}".format(h5path)) - links = h5path.split('/') sub_group = None # the object we'll return for link in links: @@ -225,7 +223,7 @@ def create_group(self, h5path): if create_group: link_json = {'id': parent_uuid, 'name': link} - body = {'link': link_json } + body = {'link': link_json} self.log.debug("create group with body: {}".format(body)) rsp = self.POST('/groups', body=body) @@ -257,13 +255,11 @@ def create_group(self, h5path): parent_name = parent_name + '/' + link_json["title"] self.log.debug("create group - parent name: {}".format(parent_name)) - if sub_group is None: # didn't actually create anything raise ValueError("name already exists") return sub_group - def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds): """ Create a new HDF5 dataset @@ -318,17 +314,16 @@ def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds): initializer (String) chunk initializer function initializer_args - (List) arguments to be passed to initializer + (List) arguments to be passed to initializer """ - if self.id.http_conn.mode == 'r': raise ValueError("Unable to create dataset (No write intent on file)") if isinstance(name, bytes): # convert byte input to string name = name.decode("utf-8") - + """ group = self if name: @@ -354,7 +349,7 @@ def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds): if isinstance(data, Empty): if dtype is None: dtype = data.dtype - else: + else: if dtype is None: dtype = guess_dtype(data) if not isinstance(data, numpy.ndarray) or dtype != data.dtype: @@ -374,7 +369,7 @@ def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds): shape = data.shape else: shape = tuple(shape) - + if data is not None and not isinstance(data, Empty) and (numpy.product(shape) != numpy.product(data.shape)): raise ValueError("Shape tuple is incompatible with data") """ @@ -503,7 +498,6 @@ def create_table(self, name, numrows=None, dtype=None, data=None, **kwds): obj = table.Table(dset.id) return obj - def require_dataset(self, name, shape, dtype, exact=False, **kwds): """ Open a dataset, creating it if it doesn't exist. @@ -518,7 +512,7 @@ def require_dataset(self, name, shape, dtype, exact=False, **kwds): shape or dtype don't match according to the above rules. """ - if not name in self: + if name not in self: return self.create_dataset(name, *(shape, dtype), **kwds) if isinstance(shape, int): @@ -546,14 +540,13 @@ def require_group(self, name): isn't a group. """ - if not name in self: + if name not in self: return self.create_group(name) grp = self[name] if not isinstance(grp, Group): raise TypeError("Incompatible object (%s) already exists" % grp.__class__.__name__) return grp - def getObjByUuid(self, uuid, collection_type=None): """ Utility method to get an obj based on collection type and uuid """ self.log.debug(f"getObjByUuid({uuid})") @@ -608,7 +601,6 @@ def getObjByUuid(self, uuid, collection_type=None): return tgt - def __getitem__(self, name): """ Open an object in the file """ # convert bytes to str for PY3 @@ -659,7 +651,7 @@ def __getitem__(self, name): endpoint = self.id.http_conn.endpoint username = self.id.http_conn.username password = self.id.http_conn.password - f = File(external_domain, endpoint=endpoint, username=username, password=password, mode='r') + f = File(external_domain, endpoint=endpoint, username=username, password=password, mode='r') except IOError: # unable to find external link raise KeyError("Unable to open file: " + link_json['h5domain']) @@ -714,7 +706,7 @@ def get(self, name, default=None, getclass=False, getlink=False): except KeyError: return default - if not name in self: + if name not in self: return default elif getclass and not getlink: @@ -790,23 +782,23 @@ def __setitem__(self, name, obj): tgt[basename] = obj elif isinstance(obj, HLObject): - body = {'id': obj.id.uuid } + body = {'id': obj.id.uuid} req = "/groups/" + self.id.uuid + "/links/" + name self.PUT(req, body=body) elif isinstance(obj, SoftLink): - body = {'h5path': obj.path } + body = {'h5path': obj.path} req = "/groups/" + self.id.uuid + "/links/" + name self.PUT(req, body=body) - #self.id.links.create_soft(name, self._e(obj.path), + # self.id.links.create_soft(name, self._e(obj.path), # lcpl=lcpl, lapl=self._lapl) elif isinstance(obj, ExternalLink): body = {'h5path': obj.path, - 'h5domain': obj.filename } + 'h5domain': obj.filename} req = "/groups/" + self.id.uuid + "/links/" + name self.PUT(req, body=body) - #self.id.links.create_external(name, self._e(obj.filename), + # self.id.links.create_external(name, self._e(obj.filename), # self._e(obj.path), lcpl=lcpl, lapl=self._lapl) elif isinstance(obj, numpy.dtype): @@ -815,18 +807,18 @@ def __setitem__(self, name, obj): type_json = h5type.getTypeItem(obj) req = "/datatypes" - body = {'type': type_json } + body = {'type': type_json} rsp = self.POST(req, body=body) body['id'] = rsp['id'] body['lastModified'] = rsp['lastModified'] type_id = TypeID(self, body) req = "/groups/" + self.id.uuid + "/links/" + name - body = {'id': type_id.uuid } + body = {'id': type_id.uuid} self.PUT(req, body=body) - #htype = h5t.py_create(obj) - #htype.commit(self.id, name, lcpl=lcpl) + # htype = h5t.py_create(obj) + # htype.commit(self.id, name, lcpl=lcpl) else: if isinstance(obj, numpy.ndarray): @@ -838,9 +830,9 @@ def __setitem__(self, name, obj): dt = guess_dtype(obj) arr = numpy.array(obj, dtype=dt) self.create_dataset(name, shape=arr.shape, dtype=arr.dtype, data=arr[...]) - - #ds = self.create_dataset(None, data=obj, dtype=base.guess_dtype(obj)) - #h5o.link(ds.id, self.id, name, lcpl=lcpl) + + # ds = self.create_dataset(None, data=obj, dtype=base.guess_dtype(obj)) + # h5o.link(ds.id, self.id, name, lcpl=lcpl) def __delitem__(self, name): """ Delete (unlink) an item from this group. """ @@ -858,7 +850,7 @@ def __delitem__(self, name): raise TypeError(f"unexpected type for object id: {tgt.id}") else: raise IOError("Not found") - + else: # delete the link, not an object req = "/groups/" + self.id.uuid + "/links/" + name @@ -866,7 +858,6 @@ def __delitem__(self, name): if name.find('/') == -1 and name in self._link_db: # remove from link cache del self._link_db[name] - def __len__(self): """ Number of members attached to this group """ @@ -900,7 +891,6 @@ def __iter__(self): for name in links: yield name - def __contains__(self, name): """ Test if a member name exists """ found = False @@ -911,7 +901,6 @@ def __contains__(self, name): pass # not found return found - def copy(self, source, dest, name=None, shallow=False, expand_soft=False, expand_external=False, expand_refs=False, without_attrs=False): @@ -1108,12 +1097,12 @@ def visititems(self, func): for link in links: obj = None if link['class'] == 'H5L_TYPE_SOFT': - #obj = SoftLink(link['h5path']) + # obj = SoftLink(link['h5path']) pass # don't visit soft links' elif link['class'] == 'H5L_TYPE_EXTERNAL': - #obj = ExternalLink(link['h5domain'], link['h5path']) - pass # don't visit external links' - elif link['class'] == 'H5L_TYPE_UDLINK': + # obj = ExternalLink(link['h5domain'], link['h5path']) + pass # don't visit external links' + elif link['class'] == 'H5L_TYPE_UDLINK': obj = UserDefinedLink() elif link['class'] == 'H5L_TYPE_HARD': if link['id'] in visited: @@ -1159,7 +1148,7 @@ class HardLink(object): pass -#TODO: implement equality testing for these +# TODO: implement equality testing for these class SoftLink(object): """ @@ -1201,6 +1190,7 @@ def __init__(self, filename, path): def __repr__(self): return '' % (self.path, self.filename) + class UserDefinedLink(object): """ diff --git a/h5pyd/_hl/h5type.py b/h5pyd/_hl/h5type.py index 60127f20..81b0ed42 100644 --- a/h5pyd/_hl/h5type.py +++ b/h5pyd/_hl/h5type.py @@ -14,8 +14,9 @@ import numpy as np -# trying to import these results in circular references, so just use is_reference, is_regionreference helpers to identify -#from .base import Reference, RegionReference +# trying to import these results in circular references, +# so just use is_reference, is_regionreference helpers to identify +# from .base import Reference, RegionReference import weakref import codecs from collections import namedtuple @@ -23,29 +24,30 @@ def is_reference(val): try: - if val.__class__.__name__ == "Reference": + if val.__class__.__name__ == "Reference": return True except AttributeError: - pass # ignore + pass # ignore try: if val.__name__ == "Reference": return True except AttributeError: - pass # ignore + pass # ignore return False + def is_regionreference(val): try: - if val.__class__.__name__ == "RegionReference": + if val.__class__.__name__ == "RegionReference": return True except AttributeError: - pass # ignore + pass # ignore try: if val.__name__ == "RegionReference": return True except AttributeError: - pass # ignore + pass # ignore return False @@ -79,7 +81,7 @@ def __repr__(self): if self._id.objtype_code == 'd': item = "datasets/" + self._id.id elif self._id.objtype_code == 'g': - item = "groups/" + self._id.id + item = "groups/" + self._id.id elif self._id.objtype_code == 't': item = "datatypes/" + self._id.id else: @@ -89,6 +91,7 @@ def __repr__(self): def tolist(self): return [self.__repr__(),] + class RegionReference(): """ @@ -113,6 +116,7 @@ def __init__(self, bind): def __repr__(self): return "" + def special_dtype(**kwds): """ Create a new h5py "special" type. Only one keyword may be given. @@ -167,6 +171,7 @@ def special_dtype(**kwds): raise TypeError('Unknown special type "%s"' % name) + def check_vlen_dtype(dt): """If the dtype represents an HDF5 vlen, returns the Python base class. @@ -177,8 +182,10 @@ def check_vlen_dtype(dt): except AttributeError: return None + string_info = namedtuple('string_info', ['encoding', 'length']) + def check_string_dtype(dt): """If the dtype represents an HDF5 string, returns a string_info object. @@ -199,6 +206,7 @@ def check_string_dtype(dt): else: return None + def check_enum_dtype(dt): """If the dtype represents an HDF5 enumerated type, returns the dictionary mapping string names to integer values. @@ -210,6 +218,7 @@ def check_enum_dtype(dt): except AttributeError: return None + def check_opaque_dtype(dt): """Return True if the dtype given is tagged to be stored as HDF5 opaque data """ @@ -218,6 +227,7 @@ def check_opaque_dtype(dt): except AttributeError: return False + def check_ref_dtype(dt): """If the dtype represents an HDF5 reference type, returns the reference class (either Reference or RegionReference). @@ -273,6 +283,7 @@ def vlen_dtype(basetype): """ return np.dtype('O', metadata={'vlen': basetype}) + def string_dtype(encoding='utf-8', length=None): """Make a numpy dtype for HDF5 strings @@ -307,6 +318,7 @@ def string_dtype(encoding='utf-8', length=None): else: raise TypeError("length must be integer or None (got %r)" % length) + def enum_dtype(values_dict, basetype=np.uint8): """Create a NumPy representation of an HDF5 enumerated type @@ -320,19 +332,18 @@ def enum_dtype(values_dict, basetype=np.uint8): return np.dtype(dt, metadata={'enum': values_dict}) -""" -Convert the given type item to a predefined type string for -predefined integer and floating point types ("H5T_STD_I64LE", et. al). -For compound types, recursively iterate through the typeItem and do same -conversion for fields of the compound type. """ - def getTypeResponse(typeItem): - + """ + Convert the given type item to a predefined type string for + predefined integer and floating point types ("H5T_STD_I64LE", et. al). + For compound types, recursively iterate through the typeItem and do same + conversion for fields of the compound type. + """ response = None if 'uuid' in typeItem: # committed type, just return uuid response = 'datatypes/' + typeItem['uuid'] - elif typeItem['class'] == 'H5T_INTEGER' or typeItem['class'] == 'H5T_FLOAT': + elif typeItem['class'] == 'H5T_INTEGER' or typeItem['class'] == 'H5T_FLOAT': # just return the class and base for pre-defined types response = {} response['class'] = typeItem['class'] @@ -350,7 +361,7 @@ def getTypeResponse(typeItem): response['class'] = 'H5T_COMPOUND' fieldList = [] for field in typeItem['fields']: - fieldItem = { } + fieldItem = {} fieldItem['name'] = field['name'] fieldItem['type'] = getTypeResponse(field['type']) # recursive call fieldList.append(fieldItem) @@ -376,14 +387,14 @@ def getTypeItem(dt): """ predefined_int_types = { - 'int8': 'H5T_STD_I8', - 'uint8': 'H5T_STD_U8', - 'int16': 'H5T_STD_I16', - 'uint16': 'H5T_STD_U16', - 'int32': 'H5T_STD_I32', - 'uint32': 'H5T_STD_U32', - 'int64': 'H5T_STD_I64', - 'uint64': 'H5T_STD_U64' + 'int8': 'H5T_STD_I8', + 'uint8': 'H5T_STD_U8', + 'int16': 'H5T_STD_I16', + 'uint16': 'H5T_STD_U16', + 'int32': 'H5T_STD_I32', + 'uint32': 'H5T_STD_U32', + 'int64': 'H5T_STD_I64', + 'uint64': 'H5T_STD_U64' } predefined_float_types = { 'float16': 'H5T_IEEE_F16', @@ -419,7 +430,7 @@ def getTypeItem(dt): vlen_check = np.dtype(vlen_check) if vlen_check is None: vlen_check = str # default to bytes - ref_check = check_dtype(ref=dt.base) + ref_check = check_dtype(ref=dt.base) if vlen_check == bytes: type_info['class'] = 'H5T_STRING' type_info['length'] = 'H5T_VARIABLE' @@ -444,7 +455,7 @@ def getTypeItem(dt): type_info['size'] = 'H5T_VARIABLE' type_info['base'] = getTypeItem(vlen_check) elif vlen_check is not None: - #unknown vlen type + # unknown vlen type raise TypeError("Unknown h5py vlen type: " + str(vlen_check)) elif ref_check is not None: # a reference type @@ -495,13 +506,13 @@ def getTypeItem(dt): if dt.base.byteorder == '>': byteorder = 'BE' # this mapping is an h5py convention for boolean support - mapping = { + mapping = { "FALSE": 0, "TRUE": 1 } type_info['class'] = 'H5T_ENUM' type_info['mapping'] = mapping - base_info = { "class": "H5T_INTEGER" } + base_info = {"class": "H5T_INTEGER"} base_info['base'] = "H5T_STD_I8" + byteorder type_info["base"] = base_info elif dt.kind == 'f': @@ -533,8 +544,8 @@ def getTypeItem(dt): type_info['mapping'] = mapping if dt.name not in predefined_int_types: raise TypeError("Unexpected integer type: " + dt.name) - #maps to one of the HDF5 predefined types - base_info = { "class": "H5T_INTEGER" } + # maps to one of the HDF5 predefined types + base_info = {"class": "H5T_INTEGER"} base_info['base'] = predefined_int_types[dt.name] + byteorder type_info["base"] = base_info else: @@ -570,12 +581,13 @@ def getTypeItem(dt): return type_info -""" + +def getItemSize(typeItem): + """ Get size of an item in bytes. For variable length types (e.g. variable length strings), return the string "H5T_VARIABLE" -""" -def getItemSize(typeItem): + """ # handle the case where we are passed a primitive type first if isinstance(typeItem, str) or isinstance(typeItem, bytes): for type_prefix in ("H5T_STD_I", "H5T_STD_U", "H5T_IEEE_F"): @@ -647,7 +659,7 @@ def getItemSize(typeItem): raise TypeError("Expected dictionary type for field") if 'type' not in field: raise KeyError("'type' missing from field") - subtype_size = getItemSize(field['type']) # recursive call + subtype_size = getItemSize(field['type']) # recursive call if subtype_size == "H5T_VARIABLE": item_size = "H5T_VARIABLE" break # don't need to look at the rest @@ -667,19 +679,19 @@ def getItemSize(typeItem): def getNumpyTypename(hdf5TypeName, typeClass=None): predefined_int_types = { - 'H5T_STD_I8': 'i1', - 'H5T_STD_U8': 'u1', - 'H5T_STD_I16': 'i2', - 'H5T_STD_U16': 'u2', - 'H5T_STD_I32': 'i4', - 'H5T_STD_U32': 'u4', - 'H5T_STD_I64': 'i8', - 'H5T_STD_U64': 'u8' + 'H5T_STD_I8': 'i1', + 'H5T_STD_U8': 'u1', + 'H5T_STD_I16': 'i2', + 'H5T_STD_U16': 'u2', + 'H5T_STD_I32': 'i4', + 'H5T_STD_U32': 'u4', + 'H5T_STD_I64': 'i8', + 'H5T_STD_U64': 'u8' } predefined_float_types = { - 'H5T_IEEE_F16': 'f2', - 'H5T_IEEE_F32': 'f4', - 'H5T_IEEE_F64': 'f8' + 'H5T_IEEE_F16': 'f2', + 'H5T_IEEE_F32': 'f4', + 'H5T_IEEE_F64': 'f8' } if len(hdf5TypeName) < 3: @@ -692,11 +704,9 @@ def getNumpyTypename(hdf5TypeName, typeClass=None): key = hdf5TypeName[:-2] endian = '>' - if key in predefined_int_types and (typeClass == None or - typeClass == 'H5T_INTEGER'): + if key in predefined_int_types and (typeClass is None or typeClass == 'H5T_INTEGER'): return endian + predefined_int_types[key] - if key in predefined_float_types and (typeClass == None or - typeClass == 'H5T_FLOAT'): + if key in predefined_float_types and (typeClass is None or typeClass == 'H5T_FLOAT'): return endian + predefined_float_types[key] raise TypeError("Type Error: invalid type") @@ -704,7 +714,7 @@ def getNumpyTypename(hdf5TypeName, typeClass=None): def createBaseDataType(typeItem): dtRet = None - if type(typeItem) == str or type(typeItem) == str: + if isinstance(typeItem, str) or isinstance(typeItem, str): # should be one of the predefined types dtName = getNumpyTypename(typeItem) dtRet = np.dtype(dtName) @@ -798,18 +808,18 @@ def createBaseDataType(typeItem): metadata = None if baseType.metadata: metadata = dict(baseType.metadata) - dtRet = np.dtype(dims+baseType.str, metadata=metadata) + dtRet = np.dtype(dims + baseType.str, metadata=metadata) else: - dtRet = np.dtype(dims+baseType.str) + dtRet = np.dtype(dims + baseType.str) return dtRet # return predefined type elif typeClass == 'H5T_REFERENCE': if 'base' not in typeItem: raise KeyError("'base' not provided") if typeItem['base'] == 'H5T_STD_REF_OBJ': - dtRet = special_dtype(ref=Reference) + dtRet = special_dtype(ref=Reference) elif typeItem['base'] == 'H5T_STD_REF_DSETREG': - dtRet = special_dtype(ref=RegionReference) + dtRet = special_dtype(ref=RegionReference) else: raise TypeError("Invalid base type for reference type") elif typeClass == 'H5T_ENUM': @@ -826,7 +836,7 @@ def createBaseDataType(typeItem): if len(mapping) == 0: raise KeyError("empty enum map") dt = createBaseDataType(base_json) - if dt.kind == 'i' and dt.name=='int8' and len(mapping) == 2 and 'TRUE' in mapping and 'FALSE' in mapping: + if dt.kind == 'i' and dt.name == 'int8' and len(mapping) == 2 and 'TRUE' in mapping and 'FALSE' in mapping: # convert to numpy boolean type dtRet = np.dtype("bool") else: @@ -836,13 +846,13 @@ def createBaseDataType(typeItem): else: raise TypeError("Invalid type class") - return dtRet -""" -Create a numpy datatype given a json type -""" + def createDataType(typeItem): + """ + Create a numpy datatype given a json type + """ dtRet = None if type(typeItem) in [str, bytes]: # should be one of the predefined types @@ -850,10 +860,9 @@ def createDataType(typeItem): dtRet = np.dtype(dtName) return dtRet # return predefined type - if type(typeItem) != dict: + if type(typeItem) is not dict: raise TypeError("invalid type") - if 'class' not in typeItem: raise KeyError("'class' not provided") typeClass = typeItem['class'] @@ -868,7 +877,7 @@ def createDataType(typeItem): raise KeyError("no 'field' elements provided") subtypes = [] for field in fields: - if type(field) != dict: + if type(field) is not dict: raise TypeError("Expected dictionary type for field") if 'name' not in field: raise KeyError("'name' missing from field") @@ -892,10 +901,11 @@ def createDataType(typeItem): dtRet = createBaseDataType(typeItem) # create non-compound dt return dtRet -""" -Return dtype with field added for Index values -""" + def getQueryDtype(dt): + """ + Return dtype with field added for Index values + """ field_names = dt.names # make up a index field name that doesn't conflict with existing names index_name = "index" @@ -909,5 +919,5 @@ def getQueryDtype(dt): for i in range(len(dt)): dt_fields.append((dt.names[i], dt[i])) query_dt = np.dtype(dt_fields) - + return query_dt diff --git a/h5pyd/_hl/h5type_test.py b/h5pyd/_hl/h5type_test.py index ca433131..363085f1 100755 --- a/h5pyd/_hl/h5type_test.py +++ b/h5pyd/_hl/h5type_test.py @@ -30,17 +30,16 @@ def testBaseIntegerTypeItem(self): typeItem = h5type.getTypeItem(dt) self.assertEqual(typeItem['class'], 'H5T_INTEGER') self.assertEqual(typeItem['base'], 'H5T_STD_I8LE') - typeItem = h5type.getTypeResponse(typeItem) # non-verbose format + typeItem = h5type.getTypeResponse(typeItem) # non-verbose format self.assertEqual(typeItem['class'], 'H5T_INTEGER') self.assertEqual(typeItem['base'], 'H5T_STD_I8LE') - def testBaseFloatTypeItem(self): dt = np.dtype(' 0 and bracket_end > 0: try: - dn_count = int(endpoint[bracket_start + 1 : bracket_end]) + dn_count = int(endpoint[bracket_start + 1: bracket_end]) except ValueError: # if value is '*' or something just drop down to default # setup based on cpu count @@ -379,7 +379,7 @@ def serverInfo(self): if self._endpoint is None: raise IOError("object not initialized") - + # make an about request rsp = self.GET("/about") if rsp.status_code != 200: @@ -440,14 +440,11 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True): if format == "binary": headers["accept"] = "application/octet-stream" - if ( - self._cache is not None - and use_cache - and format == "json" - and params["domain"] == self._domain - and "select" not in params - and "query" not in params - ): + check_cache = self._cache is not None and use_cache and format == "json" + check_cache = check_cache and params["domain"] == self._domain + check_cache = check_cache and "select" not in params and "query" not in params + + if check_cache: self.log.debug("httpcon - checking cache") if req in self._cache: self.log.debug("httpcon - returning cache result") @@ -504,12 +501,10 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True): content_type = rsp_headers["Content-Type"] self.log.debug(f"content_type: {content_type}") - if ( - content_type - and content_type.startswith("application/json") - and content_length < MAX_CACHE_ITEM_SIZE - and not req.endswith("/value") - ): + add_to_cache = content_type and content_type.startswith("application/json") + add_to_cache = add_to_cache and content_length < MAX_CACHE_ITEM_SIZE and not req.endswith("/value") + + if add_to_cache: # add to our _cache cache_rsp = CacheResponse(rsp) self.log.debug(f"adding {req} to cache") @@ -523,11 +518,10 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True): # indicates the Lambda request was successful, but not necessarily # the requested HSDS action was. # Check here and raise IOError is needed. - if ( - rsp.status_code == 200 - and content_type - and content_type.startswith("application/json") - ): + + json_success = (rsp.status_code == 200) and content_type and content_type.startswith("application/json") + + if json_success: body = json.loads(rsp.text) if "statusCode" in body: status_code = body["statusCode"] @@ -771,7 +765,6 @@ def close(self): if self._hsds: self._hsds.stop() self._hsds = None - @property def domain(self): @@ -799,7 +792,6 @@ def cache_on(self): return False else: return True - @property def domain_json(self): diff --git a/h5pyd/_hl/objectid.py b/h5pyd/_hl/objectid.py index 41b54ce3..0a7baf1c 100644 --- a/h5pyd/_hl/objectid.py +++ b/h5pyd/_hl/objectid.py @@ -16,6 +16,7 @@ import time from .h5type import createDataType + def parse_lastmodified(datestr): """Turn last modified datetime string into a datetime object.""" if isinstance(datestr, str): @@ -133,7 +134,7 @@ def __bool__(self): def __del__(self): """ cleanup """ self.close() - + __nonzero__ = __bool__ # Python 2.7 compat @@ -203,11 +204,10 @@ def chunks(self): chunks = None layout = self.layout - if layout and layout['class'] in ('H5D_CHUNKED', 'H5D_CHUNKED_REF', 'H5D_CHUNKED_REF_INDIRECT'): if "dims" in layout: chunks = layout['dims'] - + return chunks def __init__(self, parent, item, **kwds): diff --git a/h5pyd/_hl/openid.py b/h5pyd/_hl/openid.py index 47d7e85d..b17c4cec 100644 --- a/h5pyd/_hl/openid.py +++ b/h5pyd/_hl/openid.py @@ -6,9 +6,11 @@ from abc import ABC, abstractmethod from datetime import datetime + def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) + # Azure try: import adal @@ -28,6 +30,7 @@ def eprint(*args, **kwargs): from .config import Config + class OpenIDHandler(ABC): def __init__(self, endpoint, use_token_cache=True, username=None, password=None): @@ -158,7 +161,7 @@ def __init__(self, endpoint, config=None): def write_token_cache(self): if 'AD_CLIENT_SECRET' in self.config and self.config['AD_CLIENT_SECRET']: - pass # don't use token cache for unattended authentication + pass # don't use token cache for unattended authentication else: super().write_token_cache() @@ -239,7 +242,7 @@ def refresh(self): expire_dt = datetime.strptime(mgmt_token['expiresOn'], '%Y-%m-%d %H:%M:%S.%f') self._token['expiresOn'] = expire_dt.timestamp() - except: + except Exception: self._token = None @@ -251,7 +254,7 @@ def __init__(self, endpoint, config=None, scopes=None): if "google.oauth2" not in sys.modules: msg = "google.oauth2 module not found, run: python setup.py install -e '.[google]'" raise ModuleNotFoundError(msg) - + # Configuration manager hs_config = Config() @@ -332,7 +335,7 @@ def refresh(self): creds.refresh(GoogleRequest()) self._token = self._parse(creds) - except: + except Exception: self._token = None @@ -377,7 +380,7 @@ def _getKeycloakUrl(self): raise KeyError("keycloak client_id not set") url = self.config['keycloak_uri'] - url += "/auth/realms/" + url += "/auth/realms/" url += self.config['keycloak_realm'] url += "/protocol/openid-connect/token" @@ -401,7 +404,7 @@ def _parse(self, creds): if "expires_in" in creds: now = time.time() token['expiresOn'] = now + creds["expires_in"] - + # TBD: client_secret # TBD: scopes # TBD: client_id @@ -411,7 +414,7 @@ def _parse(self, creds): def acquire(self): """Acquire a new Keycloak token.""" keycloak_url = self._getKeycloakUrl() - + headers = {"Content-Type": "application/x-www-form-urlencoded"} body = {} body["username"] = self._username @@ -419,7 +422,7 @@ def acquire(self): body["grant_type"] = "password" body["client_id"] = self.config.get("keycloak_client_id") rsp = requests.post(keycloak_url, data=body, headers=headers) - + if rsp.status_code not in (200, 201): print("POST error: {}".format(rsp.status_code)) raise IOError("Keycloak response: {}".format(rsp.status_code)) @@ -429,7 +432,6 @@ def acquire(self): def refresh(self): """Try to renew a token.""" - # TBD + # TBD # unclear if refresh is supported without a client secret self._token = None - diff --git a/h5pyd/_hl/requests_lambda.py b/h5pyd/_hl/requests_lambda.py index 9ead726a..01a99880 100644 --- a/h5pyd/_hl/requests_lambda.py +++ b/h5pyd/_hl/requests_lambda.py @@ -187,7 +187,7 @@ def _invoke(self, req, method="GET", params=None, headers=None, data=None): # function_name = FUNC_NAME # req_path = REQ # params = {PARAMS} - s = req[len(LAMBDA_REQ_PREFIX) :] # strip off protocol + s = req[len(LAMBDA_REQ_PREFIX):] # strip off protocol index = s.find("/") if index <= 0: msg = "Unexpected request" @@ -197,7 +197,7 @@ def _invoke(self, req, method="GET", params=None, headers=None, data=None): msg = f"unexpected lambda function name: {function_name}" raise ValueError(msg) index = s.find(function_name) - req_path = s[index + len(function_name) :] + req_path = s[index + len(function_name):] if not req_path: msg = "no request path found" raise ValueError(msg) diff --git a/h5pyd/_hl/selections.py b/h5pyd/_hl/selections.py index 0b14dd25..26cacd7e 100644 --- a/h5pyd/_hl/selections.py +++ b/h5pyd/_hl/selections.py @@ -71,7 +71,7 @@ def select(obj, args): # "Special" indexing objects if len(args) == 1: - + arg = args[0] if isinstance(arg, Selection): @@ -185,7 +185,6 @@ def getSelectNpoints(self): raise IOError("Unsupported select type") return npoints - def broadcast(self, target_shape): """ Get an iterable for broadcasting """ if np.product(target_shape) != self.nselect: @@ -198,6 +197,7 @@ def __getitem__(self, args): def __repr__(self): return f"Selection(shape:{self._shape})" + class PointSelection(Selection): """ @@ -205,7 +205,7 @@ class PointSelection(Selection): points to the three methods append(), prepend() and set(), or a single boolean array to __getitem__. """ - def __init__(self, shape, *args, **kwds): + def __init__(self, shape, *args, **kwds): """ Create a Point selection. """ Selection.__init__(self, shape, *args, **kwds) self._points = [] @@ -215,7 +215,6 @@ def points(self): """ selection points """ return self._points - def getSelectNpoints(self): npoints = None if self._select_type == H5S_SELECT_NONE: @@ -236,16 +235,14 @@ def getSelectNpoints(self): raise IOError("Unsupported select type") return npoints - def _perform_selection(self, points, op): """ Internal method which actually performs the selection """ if isinstance(points, np.ndarray) or True: points = np.asarray(points, order='C', dtype='u8') if len(points.shape) == 1: - #points.shape = (1,points.shape[0]) + # points.shape = (1,points.shape[0]) pass - if self._select_type != H5S_SEL_POINTS: op = H5S_SELECT_SET self._select_type = H5S_SEL_POINTS @@ -261,12 +258,11 @@ def _perform_selection(self, points, op): else: raise ValueError("Unsupported operation") - #def _perform_list_selection(points, H5S_SELECT_SET): - + # def _perform_list_selection(points, H5S_SELECT_SET): def __getitem__(self, arg): """ Perform point-wise selection from a NumPy boolean array """ - if isinstance(arg, list): + if isinstance(arg, list): points = arg else: if not (isinstance(arg, np.ndarray) and arg.dtype.kind == 'b'): @@ -328,7 +324,7 @@ def step(self): def __init__(self, shape, *args, **kwds): Selection.__init__(self, shape, *args, **kwds) rank = len(self._shape) - self._sel = ((0,)*rank, self._shape, (1,)*rank, (False,)*rank) + self._sel = ((0,) * rank, self._shape, (1,) * rank, (False,) * rank) self._mshape = self._shape self._select_type = H5S_SELECT_ALL @@ -342,11 +338,11 @@ def __getitem__(self, args): raise TypeError("Invalid index for scalar dataset (only ..., () allowed)") self._select_type = H5S_SELECT_ALL return self - - start, count, step, scalar = _handle_simple(self._shape,args) + + start, count, step, scalar = _handle_simple(self._shape, args) self._sel = (start, count, step, scalar) - #self._id.select_hyperslab(start, count, step) + # self._id.select_hyperslab(start, count, step) self._select_type = H5S_SELECT_HYPERSLABS self._mshape = tuple(x for x, y in zip(count, scalar) if not y) @@ -390,7 +386,7 @@ def getQueryParam(self): dim_sel = str(start) + ':' + str(stop) if self.step[i] != 1: dim_sel += ':' + str(self.step[i]) - if i != rank-1: + if i != rank - 1: dim_sel += ',' param += dim_sel param += ']' @@ -415,7 +411,7 @@ def broadcast(self, target_shape): target = list(target_shape) tshape = [] - for idx in range(1,rank+1): + for idx in range(1, rank + 1): if len(target) == 0 or scalar[-idx]: # Skip scalar axes tshape.append(1) else: @@ -427,18 +423,19 @@ def broadcast(self, target_shape): tshape.reverse() tshape = tuple(tshape) - chunks = tuple(x//y for x, y in zip(count, tshape)) + chunks = tuple(x // y for x, y in zip(count, tshape)) nchunks = int(np.product(chunks)) if nchunks == 1: yield self._id else: sid = self._id.copy() - sid.select_hyperslab((0,)*rank, tshape, step) + sid.select_hyperslab((0,) * rank, tshape, step) for idx in range(nchunks): - offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start)) + offset = tuple(x * y * z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start)) sid.offset_simple(offset) yield sid + def __repr__(self): s = f"SimpleSelection(shape:{self._shape}, start: {self._sel[0]}," s += f" count: {self._sel[1]}, step: {self._sel[2]}" @@ -466,7 +463,6 @@ def mshape(self): """ Shape of current selection """ return self._mshape - def __init__(self, shape, *args, **kwds): Selection.__init__(self, shape, *args, **kwds) # self._mshape = self._shape @@ -499,9 +495,9 @@ def __getitem__(self, args): step = 1 else: step = arg.step - slices.append(slice(start,stop, step)) + slices.append(slice(start, stop, step)) mshape.append(count) - + elif hasattr(arg, 'dtype') and arg.dtype == np.dtype('bool'): if len(arg.shape) != 1: raise TypeError("Boolean indexing arrays must be 1-D") @@ -519,11 +515,11 @@ def __getitem__(self, args): # coordinate selection prev = None for x in arg: - #if not isinstance(x, int): + # if not isinstance(x, int): # raise TypeError(f'Illegal coordinate index "{arg}" must be a list of integers') - + if x < 0 or x >= length: - raise IndexError(f"Index ({arg}) out of range (0-{length-1})") + raise IndexError(f"Index ({arg}) out of range (0-{length - 1})") if prev is not None and x <= prev: raise TypeError("Indexing elements must be in increasing order") prev = x @@ -532,17 +528,16 @@ def __getitem__(self, args): select_type = H5S_SELLECT_FANCY elif isinstance(arg, int): if arg < 0 or arg >= length: - raise IndexError(f"Index ({arg}) out of range (0-{length-1})") + raise IndexError(f"Index ({arg}) out of range (0-{length - 1})") slices.append(arg) elif isinstance(arg, type(Ellipsis)): - slices.append(slice(0,length,1)) + slices.append(slice(0, length, 1)) else: raise TypeError(f"Unexpected arg type: {arg} - {type(arg)}") self._slices = slices self._select_type = select_type self._mshape = tuple(mshape) - def getSelectNpoints(self): """Return number of elements in current selection """ @@ -557,7 +552,7 @@ def getSelectNpoints(self): # scalar selection count = 1 npoints *= count - + return npoints def getQueryParam(self): @@ -579,17 +574,16 @@ def getQueryParam(self): query.append('[') for idx, n in enumerate(s): query.append(str(n)) - if idx+1 < len(s): + if idx + 1 < len(s): query.append(',') query.append(']') else: # scalar selection query.append(str(s)) - if dim+1 < rank: + if dim + 1 < rank: query.append(',') query.append(']') return "".join(query) - def broadcast(self, target_shape): raise TypeError("Broadcasting is not supported for complex selections") @@ -597,6 +591,7 @@ def broadcast(self, target_shape): def __repr__(self): return f"FancySelection(shape:{self._shape}, slices: {self._slices})" + def _expand_ellipsis(args, rank): """ Expand ellipsis objects and fill in missing axes. """ @@ -611,7 +606,7 @@ def _expand_ellipsis(args, rank): for arg in args: if arg is Ellipsis: - final_args.extend( (slice(None,None,None),)*(rank-n_args+1) ) + final_args.extend((slice(None, None, None),) * (rank - n_args + 1)) else: final_args.append(arg) @@ -620,6 +615,7 @@ def _expand_ellipsis(args, rank): return final_args + def _handle_simple(shape, args): """ Process a "simple" selection tuple, containing only slices and integer objects. Return is a 4-tuple with tuples for start, @@ -633,16 +629,16 @@ def _handle_simple(shape, args): start = [] count = [] - step = [] + step = [] scalar = [] for arg, length in zip(args, shape): if isinstance(arg, slice): - x,y,z = _translate_slice(arg, length) + x, y, z = _translate_slice(arg, length) s = False else: try: - x,y,z = _translate_int(int(arg), length) + x, y, z = _translate_int(int(arg), length) s = True except TypeError: raise TypeError(f'Illegal index "{arg}" (must be a slice or number)') @@ -653,28 +649,30 @@ def _handle_simple(shape, args): return tuple(start), tuple(count), tuple(step), tuple(scalar) + def _translate_int(exp, length): """ Given an integer index, return a 3-tuple (start, count, step) for hyperslab selection """ if exp < 0: - exp = length+exp + exp = length + exp - if not 0<=exp 0, then start and stop are in [0, length]; - # if step < 0, they are in [-1, length - 1] (Python 2.6b2 and later; - # Python issue 3004). + # Now if step > 0, then start and stop are in [0, length]; + # if step < 0, they are in [-1, length - 1] (Python 2.6b2 and later; + # Python issue 3004). if step < 1: raise ValueError("Step must be >= 1 (got %d)" % step) @@ -685,6 +683,7 @@ def _translate_slice(exp, length): return start, count, step + def guess_shape(sid): """ Given a dataspace, try to deduce the shape of the selection. @@ -703,8 +702,10 @@ def guess_shape(sid): elif sel_class == 'H5S_SCALAR': # NumPy has no way of expressing empty 0-rank selections, so we use None - if sel_type == H5S_SELECT_NONE: return None - if sel_type == H5S_SELECT_ALL: return tuple() + if sel_type == H5S_SELECT_NONE: + return None + if sel_type == H5S_SELECT_ALL: + return tuple() elif sel_class != 'H5S_SIMPLE': raise TypeError("Unrecognized dataspace class %s" % sel_class) @@ -715,7 +716,7 @@ def guess_shape(sid): rank = len(sid.shape) if sel_type == H5S_SELECT_NONE: - return (0,)*rank + return (0,) * rank elif sel_type == H5S_SELECT_ALL: return sid.shape @@ -731,7 +732,7 @@ def guess_shape(sid): # We have a hyperslab-based selection if N == 0: - return (0,)*rank + return (0,) * rank bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds()) @@ -749,7 +750,7 @@ def get_n_axis(sid, axis): N_axis = N/N_leftover. """ - if(boxshape[axis]) == 1: + if (boxshape[axis]) == 1: return 1 start = bottomcorner.copy() @@ -763,8 +764,7 @@ def get_n_axis(sid, axis): N_leftover = masked_sid.get_select_npoints() - return N//N_leftover - + return N // N_leftover shape = tuple(get_n_axis(sid, x) for x in range(rank)) @@ -776,8 +776,6 @@ def get_n_axis(sid, axis): return shape - - class ScalarSelection(Selection): """ @@ -788,8 +786,7 @@ class ScalarSelection(Selection): def mshape(self): return self._mshape - - def __init__(self, shape, *args, **kwds): + def __init__(self, shape, *args, **kwds): Selection.__init__(self, shape, *args, **kwds) arg = None if len(args) > 0: diff --git a/h5pyd/_hl/serverinfo.py b/h5pyd/_hl/serverinfo.py index 87c5d5b6..ff9f6024 100644 --- a/h5pyd/_hl/serverinfo.py +++ b/h5pyd/_hl/serverinfo.py @@ -51,7 +51,7 @@ def getServerInfo(endpoint=None, username=None, password=None, api_key=None, **k try: rsp = http_conn.GET("/about") break - except IOError as ioe: + except IOError: if connect_try < len(connect_backoff): time.sleep(connect_backoff[connect_try]) else: diff --git a/h5pyd/_hl/table.py b/h5pyd/_hl/table.py index ad82b5a5..bb78cef8 100644 --- a/h5pyd/_hl/table.py +++ b/h5pyd/_hl/table.py @@ -12,14 +12,14 @@ from __future__ import absolute_import import numpy -from .base import _decode +from .base import _decode from .base import bytesToArray from .dataset import Dataset from .objectid import DatasetID from . import selections as sel from .h5type import Reference from .h5type import check_dtype -from .h5type import getQueryDtype +from .h5type import getQueryDtype class Cursor(): @@ -64,17 +64,18 @@ def __iter__(self): if nrows - indx < read_count: read_count = nrows - indx if self._query is None: - arr = self._table[indx+self._start:read_count+indx+self._start] + arr = self._table[indx + self._start:read_count + indx + self._start] else: # call table to return query result if query_complete: arr = None # nothing more to fetch else: - arr = self._table.read_where(self._query, start=indx+self._start, limit=read_count) + arr = self._table.read_where(self._query, start=indx + self._start, limit=read_count) if arr is not None and arr.shape[0] < read_count: query_complete = True # we've gotten all the rows - if arr is not None and indx%self._buffer_rows < arr.shape[0]: - yield arr[indx%self._buffer_rows] + if arr is not None and indx % self._buffer_rows < arr.shape[0]: + yield arr[indx % self._buffer_rows] + class Table(Dataset): @@ -95,7 +96,6 @@ def __init__(self, bind): if len(self._shape) > 1: raise ValueError("Table must be one-dimensional") - @property def colnames(self): """Numpy-style attribute giving the number of dimensions""" @@ -118,7 +118,7 @@ def read(self, start=None, stop=None, step=None, field=None, out=None): step = 1 arr = self[start:stop:step] if field is not None: - #TBD - read just the field once the service supports it + # TBD - read just the field once the service supports it tmp = arr[field] arr = tmp if out is not None: @@ -127,12 +127,12 @@ def read(self, start=None, stop=None, step=None, field=None, out=None): else: return arr - - - def read_where(self, condition, condvars=None, field=None, start=None, stop=None, step=None, limit=0, include_index=True): + def read_where(self, condition, condvars=None, field=None, + start=None, stop=None, step=None, limit=0, include_index=True): """Read rows from table using pytable-style condition """ names = () # todo + def readtime_dtype(basetype, names): """ Make a NumPy dtype appropriate for reading """ @@ -143,7 +143,7 @@ def readtime_dtype(basetype, names): raise ValueError("Field names only allowed for compound types") for name in names: # Check all names are legal - if not name in basetype.names: + if name not in basetype.names: raise ValueError("Field %s does not appear in this type." % name) return numpy.dtype([(name, basetype.fields[name][0]) for name in names]) @@ -158,7 +158,6 @@ def readtime_dtype(basetype, names): # todo - will need the following once we have binary transfers # mtype = h5t.py_create(new_dtype) rsp_type = getQueryDtype(mtype) - # Perform the dataspace selection if start or stop: @@ -190,7 +189,7 @@ def readtime_dtype(basetype, names): if limit > 0: params["Limit"] = limit - total_rows self.log.info("req - cursor: {} page_size: {}".format(cursor, page_size)) - end_row = cursor+page_size + end_row = cursor + page_size if end_row > stop: end_row = stop selection_arg = slice(cursor, end_row) @@ -205,8 +204,8 @@ def readtime_dtype(basetype, names): rsp = self.GET(req, params=params) if isinstance(rsp, bytes): # binary response - arr = bytesToArray(rsp, rsp_type, None) - count = len(arr) + arr = bytesToArray(rsp, rsp_type, None) + count = len(arr) self.log.info(f"got {count} rows binary data") else: values = rsp["value"] @@ -229,7 +228,7 @@ def readtime_dtype(basetype, names): else: e = values[i] arr[i] = tuple(e) - + self.log.info("got {} rows".format(count)) total_rows += count data.append(arr) @@ -263,14 +262,13 @@ def readtime_dtype(basetype, names): start = 0 for arr in data: nrows = len(arr) - ret_arr[start:(start+nrows)] = arr[:] + ret_arr[start:(start + nrows)] = arr[:] start += nrows else: ret_arr = data[0] return ret_arr - def update_where(self, condition, value, start=None, stop=None, step=None, limit=None): """Modify rows in table using pytable-style condition """ @@ -317,13 +315,11 @@ def update_where(self, condition, value, start=None, stop=None, step=None, limit return arr - def create_cursor(self, condition=None, start=None, stop=None): + def create_cursor(self, condition=None, start=None, stop=None): """Return a cursor for iteration """ return Cursor(self, query=condition, start=start, stop=stop) - - def append(self, rows): """ Append rows to end of table """ @@ -344,7 +340,7 @@ def append(self, rows): try: val_dtype = val.dtype except AttributeError: - pass # not a numpy object, just leave dtype as None + pass # not a numpy object, just leave dtype as None if isinstance(val, Reference): # h5pyd References are just strings @@ -400,7 +396,6 @@ def append(self, rows): params = {} body = {} - format = "json" if use_base64: diff --git a/h5pyd/config.py b/h5pyd/config.py index 5edf4c5e..504dfcbd 100755 --- a/h5pyd/config.py +++ b/h5pyd/config.py @@ -12,6 +12,7 @@ import os import json + class Config: """ User Config state @@ -71,7 +72,7 @@ def __delitem__(self, name): def __len__(self): return len(self._cfg) - + def __iter__(self): """ Iterate over config names """ keys = self._cfg.keys() @@ -86,11 +87,3 @@ def __repr__(self): def keys(self): return self._cfg.keys() - - - - - - - - diff --git a/h5pyd/h5ds.py b/h5pyd/h5ds.py index c50137dd..a7e08327 100644 --- a/h5pyd/h5ds.py +++ b/h5pyd/h5ds.py @@ -72,7 +72,7 @@ def is_attached(dsetid: DatasetID, dscaleid: DatasetID, idx: int) -> bool: dimlist = _getAttributeJson("DIMENSION_LIST", dsetid) reflist = _getAttributeJson("REFERENCE_LIST", dscaleid) try: - return ([f"datasets/{dsetid.id}", idx] in reflist["value"] and - f"datasets/{dscaleid.id}" in dimlist["value"][idx]) + return ([f"datasets/{dsetid.id}", idx] in + reflist["value"] and f"datasets/{dscaleid.id}" in dimlist["value"][idx]) except (KeyError, IndexError): return False diff --git a/h5pyd/version.py b/h5pyd/version.py index df918605..e9a9c882 100644 --- a/h5pyd/version.py +++ b/h5pyd/version.py @@ -12,7 +12,7 @@ from __future__ import absolute_import -from distutils.version import StrictVersion as _sv +from packaging.version import Version, parse import sys import numpy @@ -20,11 +20,11 @@ hdf5_version = "REST" -_exp = _sv(version) +_exp = parse(version) -version_tuple = _exp.version + ( - ("".join(str(x) for x in _exp.prerelease),) - if _exp.prerelease is not None +version_tuple = _exp._version + ( + ("".join(str(x) for x in _exp.pre),) + if _exp.is_prerelease else ("",) ) diff --git a/setup.cfg b/setup.cfg index aa3a0214..b2f3e822 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,5 @@ max-line-length = 120 # E402: module level import not at top of file # C901: too complex -ignore = E402, C901 +# F401: unused exports are necessary in __init__.py +ignore = E402, C901, F401 diff --git a/setup.py b/setup.py index 7aaf4e26..9f0bd5bb 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ "requests_unixsocket", "pytz", "pyjwt", + "packaging" # "cryptography", ], setup_requires=["pkgconfig"], @@ -43,7 +44,7 @@ "azure": [ "msrestazure", "adal" - ], + ], "google": [ "google-api-python-client", "google-auth-oauthlib", diff --git a/test/apps/common.py b/test/apps/common.py index fc88db16..3ef97a50 100644 --- a/test/apps/common.py +++ b/test/apps/common.py @@ -50,7 +50,7 @@ def test_user1(self): # HS_USERNAME is the username h5pyd will look up if # if not provided in the File constructor user1 = {} - if "HS_USERNAME" in os.environ: + if "HS_USERNAME" in os.environ: user1["name"] = os.environ["HS_USERNAME"] else: user1["name"] = "test_user1" @@ -64,7 +64,7 @@ def test_user1(self): @property def test_user2(self): user2 = {} - if "TEST2_USERNAME" in os.environ: + if "TEST2_USERNAME" in os.environ: user2["name"] = os.environ["TEST2_USERNAME"] else: user2["name"] = "test_user2" @@ -86,22 +86,22 @@ def use_h5py(): @classmethod def setUpClass(cls): pass - #cls.tempdir = tempfile.mkdtemp(prefix='h5py-test_') + # cls.tempdir = tempfile.mkdtemp(prefix='h5py-test_') @classmethod def tearDownClass(cls): pass - #shutil.rmtree(cls.tempdir) + # shutil.rmtree(cls.tempdir) def setUp(self): self.test_dir = str(int(time.time())) - #self.f = h5py.File(self.mktemp(), 'w') + # self.f = h5py.File(self.mktemp(), 'w') def tearDown(self): try: if self.f: self.f.close() - except: + except Exception: pass if not hasattr(ut.TestCase, 'assertSameElements'): @@ -150,11 +150,11 @@ def assertArrayEqual(self, dset, arr, message=None, precision=None): self.assertTrue( dset.shape == arr.shape, "Shape mismatch (%s vs %s)%s" % (dset.shape, arr.shape, message) - ) + ) self.assertTrue( dset.dtype == arr.dtype, "Dtype mismatch (%s vs %s)%s" % (dset.dtype, arr.dtype, message) - ) + ) if arr.dtype.names is not None: for n in arr.dtype.names: @@ -164,12 +164,12 @@ def assertArrayEqual(self, dset, arr, message=None, precision=None): self.assertTrue( np.all(np.abs(dset[...] - arr[...]) < precision), "Arrays differ by more than %.3f%s" % (precision, message) - ) + ) else: self.assertTrue( np.all(dset[...] == arr[...]), "Arrays are not equal (dtype %s) %s" % (arr.dtype.str, message) - ) + ) def assertNumpyBehavior(self, dset, arr, s): """ Apply slicing arguments "s" to both dset and arr. @@ -210,7 +210,6 @@ def getFileName(self, basename): filename += ".h5" return filename - def getPathFromDomain(self, domain): """ Convert DNS-style domain name to filepath @@ -226,9 +225,7 @@ def getPathFromDomain(self, domain): path = '/' for name in names: if name: - path += name - path += '/' + path += name + path += '/' path = path[:-1] # strip trailing slash return path - - diff --git a/test/apps/config.py b/test/apps/config.py index 52f242f5..6be930ab 100755 --- a/test/apps/config.py +++ b/test/apps/config.py @@ -11,9 +11,10 @@ ############################################################################## import os + def get_test_filenames(): - return ("tall.h5", - "shuffle_compress.h5", + return ("tall.h5", + "shuffle_compress.h5", "d_objref.h5", "a_objref.h5", "d_link_hard.h5", @@ -22,13 +23,15 @@ def get_test_filenames(): "fletcher32.h5", "undecodable_attr.h5", "diamond.h5" - ) + ) + cfg = { 'use_h5py': False, 'test_file_http_path': "https://s3.amazonaws.com/hdfgroup/data/hdf5test/" } + def get(x): # see if there is a command-line override config_value = None @@ -47,6 +50,3 @@ def get(x): elif config_value.upper() in ('F', 'FALSE'): config_value = False return config_value - - - diff --git a/test/apps/is_hsds.py b/test/apps/is_hsds.py index 42980269..a2e1c365 100644 --- a/test/apps/is_hsds.py +++ b/test/apps/is_hsds.py @@ -33,8 +33,3 @@ h5pyd.Folder(folder_path) # will trigger error with h5serv except Exception: sys.exit("Server doesn't support Folder objects") - - - - - diff --git a/test/apps/load_files.py b/test/apps/load_files.py index 30c4a8a6..2c589342 100644 --- a/test/apps/load_files.py +++ b/test/apps/load_files.py @@ -36,7 +36,7 @@ out_dir = "out" test_file_http_path = config.get("test_file_http_path") -parent = h5pyd.Folder(test_folder) +parent = h5pyd.Folder(test_folder) filenames = config.get_test_filenames() if not os.path.exists(data_dir): @@ -73,4 +73,3 @@ if rc != 0: sys.exit(f"Failed to hsget {filename}") print("load_files done") - \ No newline at end of file diff --git a/test/apps/test_diamond_inspect.py b/test/apps/test_diamond_inspect.py index bff1812a..8c179464 100644 --- a/test/apps/test_diamond_inspect.py +++ b/test/apps/test_diamond_inspect.py @@ -22,9 +22,10 @@ from common import ut, TestCase + def get_filename(): if config.get("use_h5py"): - dirpath = "data" + dirpath = "data" else: dirpath = config.get("H5PYD_TEST_FOLDER") filename = os.path.join(dirpath, "diamond.h5") @@ -34,12 +35,13 @@ def get_filename(): class TestDiamondInspect(TestCase): def setUp(self): - + filename = get_filename() self.f = h5py.File(filename, "r") def test_obj_count(self): counts = {"groups": 0, "datasets": 0} + def visit(name): obj = self.f[name] if isinstance(obj, h5py.Dataset): @@ -68,10 +70,10 @@ def test_links(self): g2_dset = g1["dset"] self.assertEqual(g1_dset.id.id, g2_dset.id.id) + if __name__ == '__main__': print("filename:", get_filename()) loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) - - ut.main() + ut.main() diff --git a/test/apps/test_hsinfo.py b/test/apps/test_hsinfo.py index 03b05ff3..bf847468 100644 --- a/test/apps/test_hsinfo.py +++ b/test/apps/test_hsinfo.py @@ -18,15 +18,13 @@ class TestHsinfo(TestCase): - def test_help(self): - arg = "-h" result = subprocess.check_output(["python", "../../h5pyd/_apps/hsinfo.py", "-h"]) self.assertTrue(len(result) > 400) + if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) ut.main() - diff --git a/test/apps/test_shuffle_inspect.py b/test/apps/test_shuffle_inspect.py index 95e25845..badcc846 100644 --- a/test/apps/test_shuffle_inspect.py +++ b/test/apps/test_shuffle_inspect.py @@ -22,9 +22,10 @@ from common import ut, TestCase + def get_filename(): if config.get("use_h5py"): - dirpath = "data" + dirpath = "data" else: dirpath = config.get("H5PYD_TEST_FOLDER") filename = os.path.join(dirpath, "shuffle_compress.h5") @@ -34,11 +35,10 @@ def get_filename(): class TestShuffleInspect(TestCase): def setUp(self): - + filename = get_filename() self.f = h5py.File(filename, "r") - def test_dset(self): self.assertEqual(len(self.f), 1) self.assertTrue("dset" in self.f) @@ -47,13 +47,13 @@ def test_dset(self): self.assertTrue(dset.shuffle) self.assertEqual(dset.shape, (100,)) arr = dset[0:10] - for i in range(10): + for i in range(10): self.assertEqual(arr[i], i) + if __name__ == '__main__': print("filename:", get_filename()) loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) - - ut.main() + ut.main() diff --git a/test/apps/test_tall_inspect.py b/test/apps/test_tall_inspect.py index 82462fc2..039242fb 100644 --- a/test/apps/test_tall_inspect.py +++ b/test/apps/test_tall_inspect.py @@ -22,9 +22,10 @@ from common import ut, TestCase + def get_filename(): if config.get("use_h5py"): - dirpath = "data" + dirpath = "data" else: dirpath = config.get("H5PYD_TEST_FOLDER") filename = os.path.join(dirpath, "tall.h5") @@ -34,12 +35,12 @@ def get_filename(): class TestTallInspect(TestCase): def setUp(self): - filename = get_filename() self.f = h5py.File(filename, "r") def test_obj_count(self): counts = {"groups": 0, "datasets": 0} + def visit(name): obj = self.f[name] if isinstance(obj, h5py.Dataset): @@ -64,10 +65,10 @@ def test_attributes(self): self.assertTrue("attr2" in self.f.attrs) attr2 = self.f.attrs["attr2"] self.assertEqual(attr2.dtype, np.dtype(">i4")) - self.assertEqual(attr2.shape, (2,2)) + self.assertEqual(attr2.shape, (2, 2)) for i in range(2): for j in range(2): - self.assertEqual(attr2[i,j], 2*i+j) + self.assertEqual(attr2[i, j], 2 * i + j) dset111 = self.f["/g1/g1.1/dset1.1.1"] self.assertEqual(2, len(dset111.attrs)) self.assertTrue("attr1" in dset111.attrs) @@ -94,10 +95,10 @@ def test_dataset(self): for i in range(20): self.assertEqual(arr[i], i) + if __name__ == '__main__': print("filename:", get_filename()) loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) - - ut.main() + ut.main() diff --git a/test/hl/common.py b/test/hl/common.py index 92544eda..5d3f4e29 100644 --- a/test/hl/common.py +++ b/test/hl/common.py @@ -60,7 +60,7 @@ def test_user1(self): # HS_USERNAME is the username h5pyd will look up if # if not provided in the File constructor user1 = {} - if "HS_USERNAME" in os.environ: + if "HS_USERNAME" in os.environ: user1["name"] = os.environ["HS_USERNAME"] else: user1["name"] = "test_user1" @@ -74,7 +74,7 @@ def test_user1(self): @property def test_user2(self): user2 = {} - if "TEST12_USERNAME" in os.environ: + if "TEST12_USERNAME" in os.environ: user2["name"] = os.environ["TEST2_USERNAME"] else: user2["name"] = "test_user2" @@ -96,22 +96,22 @@ def use_h5py(): @classmethod def setUpClass(cls): pass - #cls.tempdir = tempfile.mkdtemp(prefix='h5py-test_') + # cls.tempdir = tempfile.mkdtemp(prefix='h5py-test_') @classmethod def tearDownClass(cls): pass - #shutil.rmtree(cls.tempdir) + # shutil.rmtree(cls.tempdir) def setUp(self): self.test_dir = str(int(time.time())) - #self.f = h5py.File(self.mktemp(), 'w') + # self.f = h5py.File(self.mktemp(), 'w') def tearDown(self): try: if self.f: self.f.close() - except: + except Exception: pass if not hasattr(ut.TestCase, 'assertSameElements'): @@ -160,11 +160,11 @@ def assertArrayEqual(self, dset, arr, message=None, precision=None): self.assertTrue( dset.shape == arr.shape, "Shape mismatch (%s vs %s)%s" % (dset.shape, arr.shape, message) - ) + ) self.assertTrue( dset.dtype == arr.dtype, "Dtype mismatch (%s vs %s)%s" % (dset.dtype, arr.dtype, message) - ) + ) if arr.dtype.names is not None: for n in arr.dtype.names: @@ -174,12 +174,12 @@ def assertArrayEqual(self, dset, arr, message=None, precision=None): self.assertTrue( np.all(np.abs(dset[...] - arr[...]) < precision), "Arrays differ by more than %.3f%s" % (precision, message) - ) + ) else: self.assertTrue( np.all(dset[...] == arr[...]), "Arrays are not equal (dtype %s) %s" % (arr.dtype.str, message) - ) + ) def assertNumpyBehavior(self, dset, arr, s): """ Apply slicing arguments "s" to both dset and arr. @@ -220,7 +220,6 @@ def getFileName(self, basename): filename += ".h5" return filename - def getPathFromDomain(self, domain): """ Convert DNS-style domain name to filepath @@ -236,8 +235,8 @@ def getPathFromDomain(self, domain): path = '/' for name in names: if name: - path += name - path += '/' + path += name + path += '/' path = path[:-1] # strip trailing slash return path diff --git a/test/hl/config.py b/test/hl/config.py index 32787503..b028161e 100755 --- a/test/hl/config.py +++ b/test/hl/config.py @@ -34,6 +34,3 @@ def get(x): elif config_value.upper() in ('F', 'FALSE'): config_value = False return config_value - - - diff --git a/test/hl/test_attribute.py b/test/hl/test_attribute.py index 3b20bf9d..a0388fe8 100644 --- a/test/hl/test_attribute.py +++ b/test/hl/test_attribute.py @@ -25,14 +25,13 @@ class TestAttribute(TestCase): - def test_create(self): filename = self.getFileName("create_attribute") print("filename:", filename) f = h5py.File(filename, 'w') g1 = f.create_group('g1') - + g1.attrs['a1'] = 42 n = g1.attrs['a1'] @@ -96,21 +95,21 @@ def test_create(self): self.assertEqual(arr[0], b"Hello") self.assertEqual(arr[1], b"Good-bye") self.assertEqual(arr.dtype.kind, 'S') - + # scalar byte values g1.attrs['e1'] = "Hello" s = g1.attrs['e1'] - self.assertEqual(s, "Hello" ) + self.assertEqual(s, "Hello") # scalar objref attribute - g11 = g1.create_group('g1.1') # create subgroup g1/g1.1 + g11 = g1.create_group('g1.1') # create subgroup g1/g1.1 g11.attrs['name'] = 'g1.1' # tag group with an attribute g11_ref = g11.ref # get ref to g1/g1.1 self.assertTrue(isinstance(g11_ref, h5py.Reference)) refdt = h5py.special_dtype(ref=h5py.Reference) # create ref dtype g1.attrs.create('f1', g11_ref, dtype=refdt) # create attribute with ref to g1.1 - ref = g1.attrs['f1'] # read back the attribute + ref = g1.attrs['f1'] # read back the attribute refobj = f[ref] # get the ref'd object self.assertTrue('name' in refobj.attrs) # should see the tag attribute diff --git a/test/hl/test_committedtype.py b/test/hl/test_committedtype.py index f46cbff8..75333685 100644 --- a/test/hl/test_committedtype.py +++ b/test/hl/test_committedtype.py @@ -34,8 +34,10 @@ def test_createtype(self): self.assertEqual(len(ctype.dtype), len(dt)) ctype.attrs["attr1"] = "this is a named datatype" dset = f.create_dataset('complex_dset', (10,), dtype=f['complex_type']) + self.assertNotEqual(dset, None) f.close() + if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py index f1435c84..7c5c96b2 100644 --- a/test/hl/test_dataset.py +++ b/test/hl/test_dataset.py @@ -36,6 +36,7 @@ from h5pyd import File, Dataset import h5pyd as h5py + def is_empty_dataspace(obj): shape_json = obj.shape_json if "class" not in shape_json: @@ -143,6 +144,7 @@ def test_name_bytes(self): dset2 = self.f.create_dataset(b'bar/baz', (2,)) self.assertEqual(dset2.shape, (2,)) + class TestCreateData(BaseDataset): """ @@ -235,7 +237,7 @@ def test_empty_create_via_Empty_class(self): def test_create_incompatible_data(self): # Shape tuple is incompatible with data with self.assertRaises(ValueError): - self.f.create_dataset('bar', shape=4, data= np.arange(3)) + self.f.create_dataset('bar', shape=4, data=np.arange(3)) class TestReadDirectly(BaseDataset): @@ -244,12 +246,12 @@ class TestReadDirectly(BaseDataset): Feature: Read data directly from Dataset into a Numpy array """ - source_shapes = ((100,), (70,), (30, 10), (5, 7, 9)) - dest_shapes = ((100,), (100,), (20, 20), (6,)) + source_shapes = ((100,), (70,), (30, 10), (5, 7, 9)) + dest_shapes = ((100,), (100,), (20, 20), (6,)) source_sels = (np.s_[0:10], np.s_[50:60], np.s_[:20, :], np.s_[2, :6, 3]) - dest_sels = (np.s_[50:60], np.s_[90:], np.s_[:, :10], np.s_[:]) - - def test_read_direct(self): + dest_sels = (np.s_[50:60], np.s_[90:], np.s_[:, :10], np.s_[:]) + + def test_read_direct(self): for i in range(len(self.source_shapes)): source_shape = self.source_shapes[i] dest_shape = self.dest_shapes[i] @@ -287,17 +289,18 @@ def test_not_c_contiguous(self): with self.assertRaises(TypeError): dset.read_direct(arr) + class TestWriteDirectly(BaseDataset): """ Feature: Write Numpy array directly into Dataset """ - source_shapes = ((100,), (70,), (30, 10), (5, 7, 9)) - dest_shapes = ((100,), (100,), (20, 20), (6,)) + source_shapes = ((100,), (70,), (30, 10), (5, 7, 9)) + dest_shapes = ((100,), (100,), (20, 20), (6,)) source_sels = (np.s_[0:10], np.s_[50:60], np.s_[:20, :], np.s_[2, :6, 3]) - dest_sels = (np.s_[50:60], np.s_[90:], np.s_[:, :10], np.s_[:]) - + dest_sels = (np.s_[50:60], np.s_[90:], np.s_[:, :10], np.s_[:]) + def test_write_direct(self): count = len(self.source_shapes) for i in range(count): @@ -335,7 +338,6 @@ def test_no_selection(self): dset.write_direct(arr) - class TestCreateRequire(BaseDataset): """ @@ -413,13 +415,13 @@ class TestCreateChunked(BaseDataset): def test_create_chunks(self): """ Create via chunks tuple """ - dset = self.f.create_dataset('foo', shape=(1024*1024,), chunks=(1024*1024,), dtype='i4') - self.assertEqual(dset.chunks, (1024*1024,)) + dset = self.f.create_dataset('foo', shape=(1024 * 1024,), chunks=(1024 * 1024,), dtype='i4') + self.assertEqual(dset.chunks, (1024 * 1024,)) def test_create_chunks_integer(self): """ Create via chunks integer """ - dset = self.f.create_dataset('foo', shape=(1024*1024,), chunks=1024*1024, dtype='i4') - self.assertEqual(dset.chunks, (1024*1024,)) + dset = self.f.create_dataset('foo', shape=(1024 * 1024,), chunks=1024 * 1024, dtype='i4') + self.assertEqual(dset.chunks, (1024 * 1024,)) def test_chunks_mismatch(self): """ Illegal chunk size raises ValueError """ @@ -507,7 +509,7 @@ def test_exc(self): """ Bogus fill value raises ValueError """ with self.assertRaises(ValueError): self.f.create_dataset('foo', (10,), - dtype=[('a', 'i'), ('b', 'f')], fillvalue=42) + dtype=[('a', 'i'), ('b', 'f')], fillvalue=42) class TestCreateNamedType(BaseDataset): @@ -528,12 +530,12 @@ def test_named(self): else: # h5py ref_type = self.f['type'].id - + self.assertEqual(dset_type, ref_type) if isinstance(dset.id.id, str): # h5pyd - pass # TBD: don't support committed method + pass # TBD: don't support committed method else: self.assertTrue(dset.id.get_type().committed()) @@ -616,7 +618,6 @@ def test_compression_number_invalid(self): self.f.create_dataset('foo', (20, 30), compression=100) self.assertIn("Unknown compression", str(e.exception)) - original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS try: h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple() @@ -644,7 +645,7 @@ def test_lzf(self): # use lz4 instead of lzf for HSDS compression = "lz4" else: - compression = "lzf" + compression = "lzf" dset = self.f.create_dataset('foo', (20, 30), compression=compression) self.assertEqual(dset.compression, compression) self.assertEqual(dset.compression_opts, None) @@ -659,7 +660,7 @@ def test_lzf(self): readdata = self.f['bar'][()] self.assertArrayEqual(readdata, testdata) - @ut.skip + @ut.skip def test_lzf_exc(self): """ Giving lzf options raises ValueError """ with self.assertRaises(ValueError): @@ -681,9 +682,9 @@ def test_szip(self): compressors = h5py.filters.encode if "szip" in compressors: self.f.create_dataset('foo', (20, 30), compression='szip', - compression_opts=('ec', 16)) + compression_opts=('ec', 16)) else: - pass # szip not supported + pass # szip not supported class TestCreateShuffle(BaseDataset): @@ -765,7 +766,7 @@ def test_float(self): assert (readdata == testdata).all() else: assert not (readdata == testdata).all() - + @ut.expectedFailure def test_int(self): """ Scaleoffset filter works for integer data with default precision """ @@ -852,11 +853,13 @@ def test_contents(self): # create a dataset in an external file and set it ext_file = self.mktemp() - external = [(ext_file, 0, h5f.UNLIMITED)] - dset = self.f.create_dataset('foo', shape, dtype=testdata.dtype, external=external) - dset[...] = testdata + # TBD: h5f undefined + # external = [(ext_file, 0, h5f.UNLIMITED)] + # TBD: external undefined + # dset = self.f.create_dataset('foo', shape, dtype=testdata.dtype, external=external) + # dset[...] = testdata - assert dset.external is not None + # assert dset.external is not None # verify file's existence, size, and contents with open(ext_file, 'rb') as fid: @@ -892,10 +895,12 @@ def test_invalid(self): for exc_type, external in [ (TypeError, [ext_file]), (TypeError, [ext_file, 0]), - (TypeError, [ext_file, 0, h5f.UNLIMITED]), + # TBD: h5f undefined + # (TypeError, [ext_file, 0, h5f.UNLIMITED]), (ValueError, [(ext_file,)]), (ValueError, [(ext_file, 0)]), - (ValueError, [(ext_file, 0, h5f.UNLIMITED, 0)]), + # TBD: h5f undefined + # (ValueError, [(ext_file, 0, h5f.UNLIMITED, 0)]), (TypeError, [(ext_file, 0, "h5f.UNLIMITED")]), ]: with self.assertRaises(exc_type): @@ -967,7 +972,6 @@ def get_object_mtime(self, obj): mtime = h5py.h5g.get_objinfo(obj._id).mtime return mtime - def test_no_chunks(self): self.f['lol'] = np.arange(25).reshape(5, 5) self.f.create_dataset_like('like_lol', self.f['lol']) @@ -1010,6 +1014,7 @@ def test_maxshape(self): self.assertEqual(similar.shape, (10,)) self.assertEqual(similar.maxshape, (20,)) + class TestChunkIterator(BaseDataset): def test_no_chunks(self): dset = self.f.create_dataset("foo", ()) @@ -1017,17 +1022,16 @@ def test_no_chunks(self): dset.iter_chunks() def test_1d(self): - dset = self.f.create_dataset("foo", (4096*4096,), dtype='i4', chunks=(1024*1024,)) + dset = self.f.create_dataset("foo", (4096 * 4096,), dtype='i4', chunks=(1024 * 1024,)) count = 0 for s in dset.iter_chunks(): self.assertEqual(len(s), 1) self.assertTrue(isinstance(s[0], slice)) count += 1 self.assertTrue(count > 1) - def test_2d(self): - dset = self.f.create_dataset("foo", (4096,4096), dtype='i4', chunks=(1024,1024)) + dset = self.f.create_dataset("foo", (4096, 4096), dtype='i4', chunks=(1024, 1024)) count = 0 for s in dset.iter_chunks(): self.assertEqual(len(s), 2) @@ -1035,7 +1039,7 @@ def test_2d(self): self.assertTrue(isinstance(s[i], slice)) count += 1 self.assertTrue(count > 1) - + class TestResize(BaseDataset): @@ -1080,7 +1084,7 @@ def test_resize_over(self): with self.assertRaises(Exception): dset.resize((20, 70)) - @ut.skip + @ut.skip def test_resize_nonchunked(self): """ Resizing non-chunked dataset raises TypeError """ # Skipping since all datasets are chunked in HSDS @@ -1174,7 +1178,7 @@ def test_vlen_bytes(self): """ Vlen bytes dataset maps to vlen ascii in the file """ dt = h5py.string_dtype(encoding='ascii') ds = self.f.create_dataset('x', (100,), dtype=dt) - type_json= ds.id.type_json + type_json = ds.id.type_json self.assertEqual(type_json["class"], 'H5T_STRING') self.assertEqual(type_json['charSet'], 'H5T_CSET_ASCII') string_info = h5py.check_string_dtype(ds.dtype) @@ -1280,7 +1284,6 @@ def test_asstr(self): # len of ds self.assertEqual(10, len(ds.asstr())) - # Array output np.testing.assert_array_equal( ds.asstr()[:1], np.array([data], dtype=object) @@ -1349,13 +1352,13 @@ class TestCompound(BaseDataset): def test_rt(self): """ Compound types are read back in correct order (issue 236)""" - dt = np.dtype([ ('weight', np.float64), - ('cputime', np.float64), - ('walltime', np.float64), - ('parents_offset', np.uint32), - ('n_parents', np.uint32), - ('status', np.uint8), - ('endpoint_type', np.uint8), ]) + dt = np.dtype([('weight', np.float64), + ('cputime', np.float64), + ('walltime', np.float64), + ('parents_offset', np.uint32), + ('n_parents', np.uint32), + ('status', np.uint8), + ('endpoint_type', np.uint8), ]) testdata = np.ndarray((16,), dtype=dt) for key in dt.fields: @@ -1369,8 +1372,8 @@ def test_rt(self): @ut.expectedFailure def test_assign(self): # TBD: field assignment not working - dt = np.dtype([ ('weight', (np.float64, 3)), - ('endpoint_type', np.uint8), ]) + dt = np.dtype([('weight', (np.float64, 3)), + ('endpoint_type', np.uint8), ]) testdata = np.ndarray((16,), dtype=dt) for key in dt.fields: @@ -1415,7 +1418,7 @@ def test_fields(self): @ut.expectedFailure class TestSubarray(BaseDataset): - #TBD: Fix subarray + # TBD: Fix subarray def test_write_list(self): ds = self.f.create_dataset("a", (1,), dtype="3int8") ds[0] = [1, 2, 3] @@ -1497,7 +1500,7 @@ def get_object_mtime(self, obj): def test_disable_track_times(self): """ check that when track_times=False, the time stamp=0 (Jan 1, 1970) """ ds = self.f.create_dataset('foo', (4,), track_times=False) - ds_mtime = self.get_object_mtime(ds) + ds_mtime = self.get_object_mtime(ds) if self.is_hsds(): # mod time is always tracked in HSDS self.assertTrue(ds_mtime > 0) @@ -1538,6 +1541,7 @@ def test_reading(self): self.assertEqual(ds[()].shape, arr.shape) self.assertEqual(ds[()].dtype, arr.dtype) + @ut.skip("RegionRefs not supported") class TestRegionRefs(BaseDataset): @@ -1671,7 +1675,7 @@ def test_convert(self): def test_multidim(self): dt = h5py.vlen_dtype(int) ds = self.f.create_dataset('vlen', (2, 2), dtype=dt) - #ds[0, 0] = np.arange(1) + # ds[0, 0] = np.arange(1) ds[:, :] = np.array([[np.arange(3), np.arange(2)], [np.arange(1), np.arange(2)]], dtype=object) ds[:, :] = np.array([[np.arange(2), np.arange(2)], @@ -1757,6 +1761,7 @@ def test_non_contiguous_arrays(self): assert all(self.f['nc2'][0] == y[::2]), f"{self.f['nc2'][0]} != {y[::2]}" + @ut.skip("low-level api not supported") class TestLowOpen(BaseDataset): @@ -1801,13 +1806,11 @@ def test_get_chunk_details(self): assert si.byte_offset is not None assert si.size > 0 - def test_empty_shape(self): ds = self.f.create_dataset('empty', dtype='int32') assert ds.shape is None assert ds.maxshape is None - def test_zero_storage_size(self): # https://github.com/h5py/h5py/issues/1475 from io import BytesIO @@ -1850,7 +1853,6 @@ def test_vlen_nullterm(self): with File(data_file_path) as f: assert f["ds1"][0] == b"2009-12-20T10:16:18.662409Z" - def test_allow_unknown_filter(self): # apparently 256-511 are reserved for testing purposes fake_filter_id = 256 @@ -1873,7 +1875,7 @@ def test_numpy_commutative(self,): Check that it returns symmetric response to == and != """ # TBD: investigate - shape = (100,1) + shape = (100, 1) dset = self.f.create_dataset("test", shape, dtype=float, data=np.random.rand(*shape)) # grab a value from the elements, ie dset[0] @@ -1886,7 +1888,7 @@ def test_numpy_commutative(self,): # generate sample not in the dset, ie max(dset)+delta # check that mask arrays are commutative wrt ==, != delta = 0.001 - nval = np.nanmax(dset)+delta + nval = np.nanmax(dset) + delta assert np.all((nval == dset) == (dset == nval)) assert np.all((nval != dset) == (dset != nval)) @@ -1897,7 +1899,7 @@ def test_basetype_commutative(self,): Check that operation is symmetric, even if it is potentially not meaningful. """ - shape = (100,1) + shape = (100, 1) dset = self.f.create_dataset("test", shape, dtype=float, data=np.random.rand(*shape)) @@ -1907,6 +1909,7 @@ def test_basetype_commutative(self,): assert (val == dset) == (dset == val) assert (val != dset) == (dset != val) + class TestMultiManager(BaseDataset): def test_multi_read_scalar_dataspaces(self): """ @@ -2046,7 +2049,7 @@ def test_multi_read_mixed_types(self): dtype=dts[i], data=(data_in + i)) else: dset = self.f.create_dataset("data" + str(i), shape, - dtype=dts[i], data=data_in_fixed_str) + dtype=dts[i], data=data_in_fixed_str) datasets.append(dset) @@ -2090,7 +2093,7 @@ def test_multi_read_vlen_str(self): self.assertEqual(out[i].dtype, dt) out[i] = np.reshape(out[i], newshape=np.prod(shape)) out[i] = np.reshape(np.array([s.decode() for s in out[i]], dtype=dt), - newshape=shape) + newshape=shape) np.testing.assert_array_equal(out[i], data_in) def test_multi_read_mixed_shapes(self): @@ -2107,7 +2110,7 @@ def test_multi_read_mixed_shapes(self): for i in range(count): dset = self.f.create_dataset("data" + str(i), shape=shapes[i], - dtype=dt, data=data_in[i]) + dtype=dt, data=data_in[i]) datasets.append(dset) mm = MultiManager(datasets=datasets) @@ -2119,33 +2122,33 @@ def test_multi_read_mixed_shapes(self): np.testing.assert_array_equal(out[i], data_in[i][sel_idx]) def test_multi_write_scalar_dataspaces(self): - """ - Test writing to multiple scalar datasets - """ - shape = () - count = 3 - dt = np.int32 - - # Create datasets - zeros = np.zeros(shape, dtype=dt) - data_in = [] - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dt, data=zeros) - datasets.append(dset) + """ + Test writing to multiple scalar datasets + """ + shape = () + count = 3 + dt = np.int32 - data_in.append(np.array([i])) + # Create datasets + zeros = np.zeros(shape, dtype=dt) + data_in = [] + datasets = [] - mm = MultiManager(datasets) - # Perform write - mm[...] = data_in + for i in range(count): + dset = self.f.create_dataset("data" + str(i), shape, + dtype=dt, data=zeros) + datasets.append(dset) - # Read back and check - for i in range(count): - data_out = self.f["data" + str(i)][...] - np.testing.assert_array_equal(data_out, data_in[i]) + data_in.append(np.array([i])) + + mm = MultiManager(datasets) + # Perform write + mm[...] = data_in + + # Read back and check + for i in range(count): + data_out = self.f["data" + str(i)][...] + np.testing.assert_array_equal(data_out, data_in[i]) def test_multi_write_non_scalar_dataspaces(self): """ @@ -2244,7 +2247,7 @@ def test_multi_write_vlen_str(self): datasets = [] for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape=shape, + dset = self.f.create_dataset("data" + str(i), shape=shape, data=data_initial_vlen, dtype=dt) datasets.append(dset) @@ -2259,7 +2262,7 @@ def test_multi_write_vlen_str(self): out = np.reshape(out, newshape=np.prod(shape)) out = np.reshape(np.array([s.decode() for s in out], dtype=dt), - newshape=shape) + newshape=shape) np.testing.assert_array_equal(out, data_in_vlen) def test_multi_write_mixed_shapes(self): @@ -2275,7 +2278,7 @@ def test_multi_write_mixed_shapes(self): for i in range(count): dset = self.f.create_dataset("data" + str(i), shape=shapes[i], - dtype=dt, data=np.zeros(shapes[i], dtype=dt)) + dtype=dt, data=np.zeros(shapes[i], dtype=dt)) datasets.append(dset) mm = MultiManager(datasets=datasets) @@ -2299,7 +2302,7 @@ def test_multi_write_field_selection(self): for i in range(count): dset = self.f.create_dataset("data" + str(i), shape=shape, - data=np.zeros(shape, dtype=dt), + data=np.zeros(shape, dtype=dt), dtype=dt) datasets.append(dset) diff --git a/test/hl/test_dataset_compound.py b/test/hl/test_dataset_compound.py index 9ebf48fe..86569464 100644 --- a/test/hl/test_dataset_compound.py +++ b/test/hl/test_dataset_compound.py @@ -30,8 +30,7 @@ def test_create_compound_dset(self): print("filename:", filename) f = h5py.File(filename, "w") - #curl -v --header "Host: create_compound_dset.h5pyd_test.hdfgroup.org" http://127.0.0.1:5000 - + # curl -v --header "Host: create_compound_dset.h5pyd_test.hdfgroup.org" http://127.0.0.1:5000 count = 10 @@ -40,7 +39,7 @@ def test_create_compound_dset(self): elem = dset[0] for i in range(count): - theta = (4.0 * math.pi)*(float(i)/float(count)) + theta = (4.0 * math.pi) * (float(i) / float(count)) elem['real'] = math.cos(theta) elem['img'] = math.sin(theta) dset[i] = elem @@ -53,7 +52,7 @@ def test_onefield_compound_dset(self): filename = self.getFileName("test_onefield_compound_dset") print("filename:", filename) f = h5py.File(filename, "w") - + count = 10 dt = np.dtype([('a_field', int),]) @@ -61,7 +60,7 @@ def test_onefield_compound_dset(self): elem = dset[0] for i in range(count): - elem['a_field'] = i*2 + elem['a_field'] = i * 2 dset[i] = elem val = dset[5] @@ -70,6 +69,7 @@ def test_onefield_compound_dset(self): self.assertEqual(dset.dtype.kind, "V") f.close() + if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_dataset_create.py b/test/hl/test_dataset_create.py index c3007b03..287015c0 100644 --- a/test/hl/test_dataset_create.py +++ b/test/hl/test_dataset_create.py @@ -25,7 +25,6 @@ class TestCreateDataset(TestCase): - def test_create_simple_dset(self): filename = self.getFileName("create_simple_dset") print("filename:", filename) @@ -45,7 +44,7 @@ def test_create_simple_dset(self): self.assertEqual(len(dset.maxshape), 2) self.assertEqual(dset.maxshape[0], 40) self.assertEqual(dset.maxshape[1], 80) - self.assertEqual(dset[0,0], 0) + self.assertEqual(dset[0, 0], 0) dset_ref = f['/simple_dset'] self.assertTrue(dset_ref is not None) @@ -88,23 +87,22 @@ def test_create_float16_dset(self): self.assertEqual(len(dset.maxshape), 2) self.assertEqual(dset.maxshape[0], nrows) self.assertEqual(dset.maxshape[1], ncols) - self.assertEqual(dset[0,0], 0) + self.assertEqual(dset[0, 0], 0) - arr = np.zeros((nrows,ncols), dtype="f2") + arr = np.zeros((nrows, ncols), dtype="f2") for i in range(nrows): for j in range(ncols): - val = float(i) * 10.0 + float(j)/10.0 - arr[i,j] = val + val = float(i) * 10.0 + float(j) / 10.0 + arr[i, j] = val # write entire array to dataset dset[...] = arr arr = dset[...] # read back - val = arr[2,4] # test one value + val = arr[2, 4] # test one value self.assertTrue(val > 20.4 - 0.01) self.assertTrue(val < 20.4 + 0.01) - f.close() def test_fillvalue_simple_dset(self): @@ -151,13 +149,11 @@ def test_fillvalue_char_dset(self): self.assertEqual(dset.shape[1], 3) self.assertEqual(str(dset.dtype), '|S1') self.assertEqual(dset.fillvalue, b'X') - self.assertEqual(dset[0,0], b'a') - self.assertEqual(dset[5,2], b'z') - + self.assertEqual(dset[0, 0], b'a') + self.assertEqual(dset[5, 2], b'z') f.close() - def test_simple_1d_dset(self): filename = self.getFileName("simple_1d_dset") print("filename:", filename) @@ -188,7 +184,6 @@ def test_simple_1d_dset(self): dset[0:5] = [2,] * 5 vals = dset[:] - f.close() def test_fixed_len_str_dset(self): @@ -258,7 +253,7 @@ def test_create_dset_gzip(self): arr = np.random.rand(dims[0], dims[1]) dset = f.create_dataset('simple_dset_gzip', data=arr, dtype='f8', - compression='gzip', compression_opts=9) + compression='gzip', compression_opts=9) self.assertEqual(dset.name, "/simple_dset_gzip") self.assertTrue(isinstance(dset.shape, tuple)) @@ -281,7 +276,7 @@ def test_create_dset_gzip(self): else: self.assertEqual(chunks[0], 20) self.assertEqual(chunks[1], 40) - + self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 9) self.assertFalse(dset.shuffle) @@ -303,7 +298,7 @@ def test_create_dset_lz4(self): f = h5py.File(filename, "w") if config.get("use_h5py"): - return # lz4 not supported with h5py + return # lz4 not supported with h5py if "lz4" not in f.compressors: print("lz4 not supproted") @@ -315,7 +310,7 @@ def test_create_dset_lz4(self): arr = np.random.rand(dims[0], dims[1]) dset = f.create_dataset('simple_dset_lz4', data=arr, dtype='i4', - compression='lz4', compression_opts=5) + compression='lz4', compression_opts=5) self.assertEqual(dset.name, "/simple_dset_lz4") self.assertTrue(isinstance(dset.shape, tuple)) @@ -362,9 +357,9 @@ def test_create_dset_gzip_and_shuffle(self): # create some test data arr = np.random.rand(dims[0], dims[1]) - kwds = {"chunks": (4,8)} + kwds = {"chunks": (4, 8)} dset = f.create_dataset('simple_dset_gzip_shuffle', data=arr, dtype='f8', - compression='gzip', shuffle=True, compression_opts=9, **kwds) + compression='gzip', shuffle=True, compression_opts=9, **kwds) self.assertEqual(dset.name, "/simple_dset_gzip_shuffle") self.assertTrue(isinstance(dset.shape, tuple)) @@ -380,8 +375,8 @@ def test_create_dset_gzip_and_shuffle(self): chunks = dset.chunks # chunk layout auto-generated self.assertTrue(isinstance(chunks, tuple)) self.assertEqual(len(chunks), 2) - #self.assertEqual(dset.compression, 'gzip') - #self.assertEqual(dset.compression_opts, 9) + # self.assertEqual(dset.compression, 'gzip') + # self.assertEqual(dset.compression_opts, 9) self.assertTrue(dset.shuffle) dset_ref = f['/simple_dset_gzip_shuffle'] @@ -414,13 +409,12 @@ def test_bool_dset(self): self.assertEqual(dset[0], False) - vals = dset[:] # read back for i in range(10): self.assertEqual(vals[i], False) # Write True's to the first five elements - dset[0:5] = [True,]*5 + dset[0:5] = [True,] * 5 dset = None dset = f["/bool_dset"] @@ -428,7 +422,7 @@ def test_bool_dset(self): # read back vals = dset[...] for i in range(5): - if i<5: + if i < 5: self.assertEqual(vals[i], True) else: self.assertEqual(vals[i], False) @@ -456,7 +450,7 @@ def test_require_dset(self): self.assertEqual(len(dset.maxshape), 2) self.assertEqual(dset.maxshape[0], 40) self.assertEqual(dset.maxshape[1], 80) - self.assertEqual(dset[0,0], 0) + self.assertEqual(dset[0, 0], 0) self.assertEqual(len(f), 1) @@ -472,7 +466,7 @@ def test_require_dset(self): self.assertEqual(len(f), 1) - try: + try: f.require_dataset('dset', dims, dtype='f4', exact=True) self.assertTrue(False) # exception expected except TypeError: @@ -498,14 +492,14 @@ def check_props(dset): self.assertEqual(len(dset.maxshape), 2) self.assertEqual(dset.maxshape[0], 40) self.assertEqual(dset.maxshape[1], 80) - self.assertEqual(dset[0,0], 0) + self.assertEqual(dset[0, 0], 0) dims = (40, 80) dset = f.create_dataset('simple_dset', dims, dtype='f4') self.assertEqual(dset.name, '/simple_dset') check_props(dset) - + dset_copy = f.create_dataset_like('similar_dset', dset) self.assertEqual(dset_copy.name, '/similar_dset') check_props(dset_copy) @@ -549,7 +543,7 @@ def validate_dset(dset): self.assertEqual(len(dset.maxshape), 2) self.assertEqual(dset.maxshape[0], 40) self.assertEqual(dset.maxshape[1], 80) - self.assertEqual(dset[0,0], 0) + self.assertEqual(dset[0, 0], 0) filename = self.getFileName("create_anon_dset") print("filename:", filename) @@ -570,7 +564,6 @@ def validate_dset(dset): self.assertEqual(dset.num_chunks, 0) self.assertEqual(dset.allocated_size, 0) - f.close() f = h5py.File(filename, "a") # re-open @@ -592,11 +585,10 @@ def validate_dset(dset): print(f"didn't expect to get: {dset}") self.asertTrue(False) except IOError: - pass # expected - f.close() + pass # expected + f.close() - if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_dataset_extend.py b/test/hl/test_dataset_extend.py index 92745a9a..d3187867 100644 --- a/test/hl/test_dataset_extend.py +++ b/test/hl/test_dataset_extend.py @@ -32,7 +32,7 @@ def test_extend_dset(self): primes = [2, 3, 5, 7, 11, 13, 17, 19] - dset = f.create_dataset('primes', (1,len(primes)), maxshape=(None, len(primes)), dtype='i8') + dset = f.create_dataset('primes', (1, len(primes)), maxshape=(None, len(primes)), dtype='i8') maxshape = dset.maxshape self.assertEqual(maxshape[0], None) @@ -40,10 +40,10 @@ def test_extend_dset(self): shape = dset.shape self.assertEqual(shape[0], 1) self.assertEqual(shape[1], len(primes)) - #print('chunks:', dset.chunks) + # print('chunks:', dset.chunks) # write primes - dset[0:,:] = primes + dset[0:, :] = primes # extend first dimension of dataset dset.resize(2, axis=0) @@ -58,10 +58,10 @@ def test_extend_dset(self): for i in range(len(primes)): primes[i] *= 2 - dset[1:,:] = primes + dset[1:, :] = primes # retrieve an element from updated dataset - self.assertEqual(dset[1,2], 10) + self.assertEqual(dset[1, 2], 10) f.close() diff --git a/test/hl/test_dataset_fancyselect.py b/test/hl/test_dataset_fancyselect.py index c6df5a01..a43e8673 100644 --- a/test/hl/test_dataset_fancyselect.py +++ b/test/hl/test_dataset_fancyselect.py @@ -22,9 +22,6 @@ from common import ut, TestCase -# test fancy selection -# -# class TestFancySelectDataset(TestCase): def test_dset(self): @@ -32,22 +29,22 @@ def test_dset(self): print("filename:", filename) f = h5py.File(filename, "w") - dset2d = f.create_dataset('dset2d', (10,10), dtype='i4') - vals = np.zeros((10,10), dtype='i4') + dset2d = f.create_dataset('dset2d', (10, 10), dtype='i4') + vals = np.zeros((10, 10), dtype='i4') for i in range(10): for j in range(10): - vals[i,j] = i*10+j + vals[i, j] = i * 10 + j dset2d[...] = vals - coords = [2,5,6,9] + coords = [2, 5, 6, 9] - arr = dset2d[ 5:7, coords ] - self.assertEqual(arr.shape, (2,4)) + arr = dset2d[5:7, coords] + self.assertEqual(arr.shape, (2, 4)) for i in range(2): row = arr[i] for j in range(4): - self.assertEqual(row[j], (i+5)*10+coords[j]) - + self.assertEqual(row[j], (i + 5) * 10 + coords[j]) + f.close() diff --git a/test/hl/test_dataset_getitem.py b/test/hl/test_dataset_getitem.py index 15dad3a0..07ce007b 100644 --- a/test/hl/test_dataset_getitem.py +++ b/test/hl/test_dataset_getitem.py @@ -60,6 +60,7 @@ Update using new NULL dataset constructor once h5py 2.7 is out. """ + class TestEmpty(TestCase): def setUp(self): @@ -67,7 +68,7 @@ def setUp(self): filename = self.getFileName("dataset_testempty") print("filename:", filename) self.f = h5py.File(filename, 'w') - self.dset = self.f.create_dataset('x',dtype='S10') + self.dset = self.f.create_dataset('x', dtype='S10') self.empty_obj = h5py.Empty(np.dtype("S10")) def test_ndim(self): @@ -100,7 +101,7 @@ def test_slice(self): def test_multi_block_slice(self): """ MultiBlockSlice -> ValueError """ """ TBD """ - #with self.assertRaises(ValueError): + # with self.assertRaises(ValueError): # self.dset[h5py.MultiBlockSlice()] def test_index(self): @@ -111,7 +112,7 @@ def test_index(self): def test_indexlist(self): """ index list -> ValueError """ with self.assertRaises(ValueError): - self.dset[[1,2,5]] + self.dset[[1, 2, 5]] def test_mask(self): """ mask -> ValueError """ @@ -123,7 +124,7 @@ def test_fieldnames(self): """ field name -> ValueError """ with self.assertRaises(ValueError): self.dset['field'] - + class TestScalarFloat(TestCase): @@ -175,7 +176,7 @@ def test_index(self): def test_indexlist(self): """ index list -> ValueError """ with self.assertRaises(ValueError): - self.dset[[1,2,5]] + self.dset[[1, 2, 5]] # FIXME: NumPy permits this def test_mask(self): @@ -198,9 +199,9 @@ def setUp(self): print("filename:", filename) self.f = h5py.File(filename, 'w') self.data = np.array((42.5, -118, "Hello"), dtype=[('a', 'f'), ('b', 'i'), ('c', '|S10')]) - #self.dset = self.f.create_dataset('x', data=self.data) + # self.dset = self.f.create_dataset('x', data=self.data) self.dset = self.f.create_dataset('x', (), dtype=[('a', 'f'), ('b', 'i'), ('c', '|S10')]) - self.dset[...] = (42.5, -118, "Hello") + self.dset[...] = (42.5, -118, "Hello") def test_ndim(self): """ Verify number of dimensions """ @@ -246,7 +247,7 @@ def test_index(self): def test_indexlist(self): """ index list -> ValueError """ with self.assertRaises(ValueError): - self.dset[[1,2,5]] + self.dset[[1, 2, 5]] # FIXME: NumPy permits this def test_mask(self): @@ -259,13 +260,12 @@ def test_mask(self): @ut.skip def test_fieldnames(self): """ field name -> bare value """ - #TBD: fix when field access is supported in hsds + # TBD: fix when field access is supported in hsds out = self.dset['a'] self.assertIsInstance(out, np.float32) self.assertEqual(out, self.dset['a']) - class TestScalarArray(TestCase): def setUp(self): @@ -279,7 +279,7 @@ def setUp(self): try: self.dset[...] = self.data except (IOError, OSError) as oe: - #TBD this is failing on HSDS + # TBD this is failing on HSDS if not self.is_hsds(): raise oe @@ -454,7 +454,7 @@ def test_index_outofrange(self): self.dset[100] def test_indexlist_simple(self): - self.assertNumpyBehavior(self.dset, self.data, np.s_[[1,2,5]]) + self.assertNumpyBehavior(self.dset, self.data, np.s_[[1, 2, 5]]) def test_indexlist_single_index_ellipsis(self): self.assertNumpyBehavior(self.dset, self.data, np.s_[[0], ...]) @@ -475,12 +475,12 @@ def test_indexlist_outofrange(self): def test_indexlist_nonmonotonic(self): """ we require index list values to be strictly increasing """ with self.assertRaises(TypeError): - self.dset[[1,3,2]] + self.dset[[1, 3, 2]] def test_indexlist_repeated(self): """ we forbid repeated index values """ with self.assertRaises(TypeError): - self.dset[[1,1,2]] + self.dset[[1, 1, 2]] def test_mask_true(self): self.assertNumpyBehavior(self.dset, self.data, np.s_[self.data > -100]) @@ -509,7 +509,7 @@ def setUp(self): filename = self.getFileName("dataset_test2dzerofloat") print("filename:", filename) self.f = h5py.File(filename, 'w') - self.data = np.ones((0,3), dtype='f') + self.data = np.ones((0, 3), dtype='f') self.dset = self.f.create_dataset('x', data=self.data) def test_ndim(self): @@ -521,7 +521,8 @@ def test_shape(self): self.assertEqual(self.dset.shape, (0, 3)) def test_indexlist(self): - self.assertNumpyBehavior(self.dset, self.data, np.s_[:,[0,1,2]]) + self.assertNumpyBehavior(self.dset, self.data, np.s_[:, [0, 1, 2]]) + class Test2DFloat(TestCase): @@ -530,9 +531,9 @@ def setUp(self): filename = self.getFileName("dataset_test2dfloat") print("filename:", filename) self.f = h5py.File(filename, 'w') - self.data = np.ones((5,3), dtype='f') + self.data = np.ones((5, 3), dtype='f') self.dset = self.f.create_dataset('x', data=self.data) - + def test_ndim(self): """ Verify number of dimensions """ self.assertEqual(self.dset.ndim, 2) @@ -543,14 +544,14 @@ def test_size(self): def test_nbytes(self): """ Verify nbytes """ - self.assertEqual(self.dset.nbytes, 15*self.data.dtype.itemsize) + self.assertEqual(self.dset.nbytes, 15 * self.data.dtype.itemsize) def test_shape(self): """ Verify shape """ self.assertEqual(self.dset.shape, (5, 3)) def test_indexlist(self): - self.assertNumpyBehavior(self.dset, self.data, np.s_[:,[0,1,2]]) + self.assertNumpyBehavior(self.dset, self.data, np.s_[:, [0, 1, 2]]) @ut.expectedFailure def test_index_emptylist(self): @@ -559,6 +560,7 @@ def test_index_emptylist(self): # with h5py 3.2.1 at least self.assertNumpyBehavior(self.dset, self.data, np.s_[[]]) + class Test3DFloat(TestCase): def setUp(self): @@ -566,11 +568,12 @@ def setUp(self): filename = self.getFileName("dataset_test3dfloat") print("filename:", filename) self.f = h5py.File(filename, 'w') - self.data = np.ones((4,6,8), dtype='f') + self.data = np.ones((4, 6, 8), dtype='f') self.dset = self.f.create_dataset('x', data=self.data, dtype='f') def test_index_simple(self): - self.assertNumpyBehavior(self.dset, self.data, np.s_[1,2:4,3:6]) + self.assertNumpyBehavior(self.dset, self.data, np.s_[1, 2:4, 3:6]) + class TestVeryLargeArray(TestCase): @@ -585,6 +588,7 @@ def setUp(self): def test_size(self): self.assertEqual(self.dset.size, 2**31) + if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_dataset_initializer.py b/test/hl/test_dataset_initializer.py index faf16fd9..32862105 100644 --- a/test/hl/test_dataset_initializer.py +++ b/test/hl/test_dataset_initializer.py @@ -10,7 +10,6 @@ # request a copy from help@hdfgroup.org. # ############################################################################## import logging -import numpy as np import config @@ -21,8 +20,8 @@ from common import ut, TestCase -class TestDatasetInitializer(TestCase): +class TestDatasetInitializer(TestCase): def test_create_arange_dset(self): filename = self.getFileName("create_arange_dset") @@ -36,9 +35,9 @@ def test_create_arange_dset(self): start = 10 step = 2 dims = (extent,) - initializer="arange" + initializer = "arange" initializer_opts = [f"--start={start}", f"--step={step}"] - kwargs = {"dtype": "i8", "initializer":initializer, "initializer_opts": initializer_opts} + kwargs = {"dtype": "i8", "initializer": initializer, "initializer_opts": initializer_opts} dset = f.create_dataset('arange_dset', dims, **kwargs) self.assertEqual(dset.name, "/arange_dset") @@ -56,6 +55,7 @@ def test_create_arange_dset(self): f.close() + if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_dataset_objref.py b/test/hl/test_dataset_objref.py index e059a087..37b9a74d 100644 --- a/test/hl/test_dataset_objref.py +++ b/test/hl/test_dataset_objref.py @@ -21,7 +21,6 @@ class TestObjRef(TestCase): - def test_create(self): filename = self.getFileName("objref_test") print(filename) @@ -133,7 +132,6 @@ def test_create(self): self.assertEqual(obj.id.id, d1.id.id) # ref to d1 self.assertEqual(obj.name, "/g2/d1") - def test_delete(self): filename = self.getFileName("objref_delete_test") print(filename) @@ -142,7 +140,7 @@ def test_delete(self): self.assertTrue('/' in f) # create a dataset - dset = f.create_dataset('dset', data=[1,2,3]) + dset = f.create_dataset('dset', data=[1, 2, 3]) dset_ref = dset.ref f.attrs["dset_ref"] = dset_ref @@ -150,7 +148,7 @@ def test_delete(self): try: f[dset_ref] except ValueError: - pass # expected + pass # expected f.close() diff --git a/test/hl/test_dataset_pointselect.py b/test/hl/test_dataset_pointselect.py index 0ab1909f..d7a0574e 100644 --- a/test/hl/test_dataset_pointselect.py +++ b/test/hl/test_dataset_pointselect.py @@ -11,8 +11,6 @@ ############################################################################## import numpy as np -import math -import logging import config @@ -23,9 +21,6 @@ from common import ut, TestCase -# test point selection -# -# class TestPointSelectDataset(TestCase): def test_boolean_select(self): @@ -33,12 +28,12 @@ def test_boolean_select(self): print("filename:", filename) f = h5py.File(filename, "w") - data = np.zeros((10,10), dtype='i4') + data = np.zeros((10, 10), dtype='i4') for i in range(10): for j in range(10): - data[i,j] = i - j + data[i, j] = i - j dset = f.create_dataset('dset', data=data) - pos_vals = dset[ data > 0 ] + pos_vals = dset[data > 0] self.assertEqual(len(pos_vals), 45) for value in pos_vals: self.assertTrue(value > 0) @@ -55,8 +50,8 @@ def test_1d_pointselect(self): vals.reverse() dset1d[...] = vals vals = dset1d[...] - pts = dset1d[ [2,4,6,8] ] - expected_vals = [7,5,3,1] + pts = dset1d[[2, 4, 6, 8]] + expected_vals = [7, 5, 3, 1] for i in range(len(expected_vals)): self.assertEqual(pts[i], expected_vals[i]) @@ -67,34 +62,33 @@ def test_2d_pointselect(self): print("filename:", filename) f = h5py.File(filename, "w") - dset2d = f.create_dataset('dset2d', (10,20), dtype='i4') - vals = np.zeros((10,20), dtype='i4') + dset2d = f.create_dataset('dset2d', (10, 20), dtype='i4') + vals = np.zeros((10, 20), dtype='i4') for i in range(10): for j in range(20): - vals[i,j] = i*1000 + j + vals[i, j] = i * 1000 + j dset2d[...] = vals vals = dset2d[...] # TBD: selection with a list for one axis is not working in HSDS if config.get("use_h5py"): - pts = dset2d[ 5, (5,10,15) ] + pts = dset2d[5, (5, 10, 15)] else: # But this type of selection not working for h5py # cf: https://github.com/h5py/h5py/issues/966 - pts = dset2d[ [ (5,5), (5,10), (5,15) ] ] + pts = dset2d[[(5, 5), (5, 10), (5, 15)]] - expected_vals = [5005,5010,5015] + expected_vals = [5005, 5010, 5015] for i in range(len(expected_vals)): - self.assertEqual(pts[i],expected_vals[i]) + self.assertEqual(pts[i], expected_vals[i]) - pts = dset2d[[1,2]] + pts = dset2d[[1, 2]] if config.get("use_h5py"): # TBD: fix for h5pyd - self.assertEqual(pts.shape, (2,20)) + self.assertEqual(pts.shape, (2, 20)) for i in range(20): - self.assertEqual(pts[0,i], vals[1,i]) - self.assertEqual(pts[1,i], vals[2,i]) - + self.assertEqual(pts[0, i], vals[1, i]) + self.assertEqual(pts[1, i], vals[2, i]) f.close() @@ -103,29 +97,28 @@ def test_2d_pointselect_broadcast(self): print("filename:", filename) f = h5py.File(filename, "w") - dset2d = f.create_dataset('dset2d', (10,20), dtype='i4') - vals = np.zeros((10,20), dtype='i4') + dset2d = f.create_dataset('dset2d', (10, 20), dtype='i4') + vals = np.zeros((10, 20), dtype='i4') for i in range(10): for j in range(20): - vals[i,j] = i*1000 + j + vals[i, j] = i * 1000 + j dset2d[...] = vals if config.get("use_h5py"): # TODO - not working for h5pyd - pts = dset2d[(2,4,7), :] - self.assertEqual(len(pts),3) - row1 = pts[0,:] + pts = dset2d[(2, 4, 7), :] + self.assertEqual(len(pts), 3) + row1 = pts[0, :] self.assertEqual(list(row1), list(range(2000, 2020))) - row2 = pts[1,:] + row2 = pts[1, :] self.assertEqual(list(row2), list(range(4000, 4020))) - row3 = pts[2,:] + row3 = pts[2, :] self.assertEqual(list(row3), list(range(7000, 7020))) - f.close() if __name__ == '__main__': - #loglevel = logging.DEBUG - #logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) + # loglevel = logging.DEBUG + # logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) ut.main() diff --git a/test/hl/test_dataset_scalar.py b/test/hl/test_dataset_scalar.py index e94e116e..33d52236 100644 --- a/test/hl/test_dataset_scalar.py +++ b/test/hl/test_dataset_scalar.py @@ -23,12 +23,13 @@ from common import ut, TestCase from datetime import datetime + class TestScalarDataset(TestCase): def test_scalar_dset(self): filename = self.getFileName("scalar_dset") print("filename:", filename) f = h5py.File(filename, "w") - + dset = f.create_dataset('scalar', data=42, dtype='i8') val = dset[()] @@ -56,14 +57,13 @@ def test_scalar_dset(self): self.assertTrue(isinstance(dset.modified, datetime)) self.assertEqual(dset.file.filename, filename) - + # try creating dataset implicitly g1 = f.create_group("g1") g1["scalar"] = 42 dset = g1["scalar"] val = dset[()] self.assertEqual(val, 42) - f.close() @@ -97,7 +97,7 @@ def test_scalar_str_dset(self): # try setting value using tuple dset[()] = str3 val = dset[()] - + self.assertEqual(val, str3.encode("utf-8")) # try creating dataset implicitly diff --git a/test/hl/test_dataset_setitem.py b/test/hl/test_dataset_setitem.py index 79e9dae8..b1f71617 100644 --- a/test/hl/test_dataset_setitem.py +++ b/test/hl/test_dataset_setitem.py @@ -39,10 +39,10 @@ def test_broadcast(self): filename = self.getFileName("dset_broadcast") print("filename:", filename) f = h5py.File(filename, "w") - dset = f.create_dataset("dset", (4,5), dtype=np.int32) + dset = f.create_dataset("dset", (4, 5), dtype=np.int32) dset[...] = 42 for i in range(4): - self.assertEqual(dset[i,i], 42) + self.assertEqual(dset[i, i], 42) f.close() def test_type_conversion(self): @@ -56,7 +56,6 @@ def test_type_conversion(self): f.close() - if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_dimscale.py b/test/hl/test_dimscale.py index 650f46da..8e0a9a91 100644 --- a/test/hl/test_dimscale.py +++ b/test/hl/test_dimscale.py @@ -43,7 +43,7 @@ def test_everything(self): self.assertEqual(len(dset.dims), len(dset.shape)) for d in dset.dims: self.assertIsInstance(d, h5py._hl.dims.DimensionProxy) - + # Create and name dimension scales dset.dims.create_scale(f['scale_x'], 'Simulation X (North) axis') self.assertTrue(h5py.h5ds.is_scale(f['scale_x'].id)) diff --git a/test/hl/test_file.py b/test/hl/test_file.py index 97b0a9d0..ef2c6df3 100644 --- a/test/hl/test_file.py +++ b/test/hl/test_file.py @@ -19,20 +19,20 @@ import h5pyd as h5py from common import ut, TestCase -from datetime import datetime from copy import copy import time import logging + class TestFile(TestCase): def test_version(self): version = h5py.version.version # should be of form "n.n.n" n = version.find(".") - self.assertTrue(n>=1) - m = version[(n+1):].find('.') - self.assertTrue(m>=1) + self.assertTrue(n >= 1) + m = version[(n + 1):].find('.') + self.assertTrue(m >= 1) def test_serverinfo(self): if h5py.__name__ == "h5pyd": @@ -50,7 +50,6 @@ def test_serverinfo(self): self.assertTrue(node_count >= 1) self.assertTrue("isadmin" in info) - def test_create(self): filename = self.getFileName("new_file") print("filename:", filename) @@ -62,7 +61,7 @@ def test_create(self): self.assertEqual(len(f.keys()), 0) self.assertEqual(f.mode, 'r+') self.assertTrue(h5py.is_hdf5(filename)) - + if h5py.__name__ == "h5pyd": self.assertTrue(f.id.http_conn.endpoint.startswith("http")) self.assertTrue(f.id.id is not None) @@ -71,9 +70,9 @@ def test_create(self): self.assertFalse(h5py.is_hdf5(f.id.id)) # Check domain's timestamps if h5py.__name__ == "h5pyd": - #print("modified:", datetime.fromtimestamp(f.modified), f.modified) - #print("created: ", datetime.fromtimestamp(f.created), f.created) - #print("now: ", datetime.fromtimestamp(now), now) + # print("modified:", datetime.fromtimestamp(f.modified), f.modified) + # print("created: ", datetime.fromtimestamp(f.created), f.created) + # print("now: ", datetime.fromtimestamp(now), now) # verify the timestamps make sense # we add a 30-sec margin to account for possible time skew # between client and server @@ -82,12 +81,12 @@ def test_create(self): self.assertTrue(f.modified - 30.0 < now) self.assertTrue(f.modified + 30.0 > now) self.assertTrue(f.modified >= f.created) - + self.assertTrue(len(f.owner) > 0) version = f.serverver - # server version should be of form "n.n.n" + # server version should be of form "n.n.n" n = version.find(".") - self.assertTrue(n>=1) + self.assertTrue(n >= 1) limits = f.limits for k in ('min_chunk_size', 'max_chunk_size', 'max_request_size'): self.assertTrue(k in limits) @@ -108,8 +107,8 @@ def test_create(self): # re-open is exclusive mode (should fail) h5py.File(filename, mode) self.assertTrue(False) - except IOError as ioe: - pass + except IOError: + pass # re-open as read-write f = h5py.File(filename, 'w') @@ -117,7 +116,7 @@ def test_create(self): self.assertEqual(f.mode, 'r+') self.assertEqual(len(f.keys()), 0) root_grp = f['/'] - #f.create_group("subgrp") + # f.create_group("subgrp") root_grp.create_group("subgrp") self.assertEqual(len(f.keys()), 1) f.close() @@ -128,11 +127,11 @@ def test_create(self): f.create_group("foo") del f["foo"] f.close() - + # re-open as read-only if h5py.__name__ == "h5pyd": - wait_time = 90 # change to >90 to test async updates - print("waiting {wait_time:d} seconds for root scan sync".format(wait_time = wait_time)) + wait_time = 90 # change to >90 to test async updates + print("waiting {wait_time:d} seconds for root scan sync".format(wait_time=wait_time)) time.sleep(wait_time) # let async process update obj number f = h5py.File(filename, 'r') self.assertEqual(f.filename, filename) @@ -162,13 +161,13 @@ def test_create(self): self.assertEqual(len(f.keys()), 1) - if h5py.__name__ == "h5pyd": + if h5py.__name__ == "h5pyd": # check properties that are only available for h5pyd # Note: num_groups won't reflect current state since the # data is being updated asynchronously self.assertEqual(f.num_objects, 3) self.assertEqual(f.num_groups, 3) - + self.assertEqual(f.num_datasets, 0) self.assertEqual(f.num_datatypes, 0) self.assertTrue(f.allocated_bytes == 0) @@ -191,7 +190,6 @@ def test_create(self): self.assertTrue('/' in f) f.close() - # open in truncate mode f = h5py.File(filename, 'w') self.assertEqual(f.filename, filename) @@ -207,19 +205,18 @@ def test_create(self): f.close() self.assertEqual(f.id.id, 0) - def test_open_notfound(self): # verify open of non-existent file throws exception try: filename = self.getFileName("no_file_here") print("filename:", filename) - f = h5py.File(filename, 'r') - self.assertTrue(False) #expect exception + with h5py.File(filename, 'r') as f: + self.assertNotEqual(f, None) + self.assertTrue(False) # expect exception except IOError: pass - def test_auth(self): if h5py.__name__ == "h5py": return # ACLs are just for h5pyd @@ -235,19 +232,18 @@ def test_auth(self): self.assertEqual(f.filename, filename) self.assertEqual(f.name, "/") self.assertTrue(f.id.id is not None) - root_id = f.id.id self.assertEqual(len(f.keys()), 2) # no explicit ACLs yet file_acls = f.getACLs() self.assertTrue(len(file_acls) >= 1) # Should have at least the test_user1 acl - + username = f.owner file_acl = f.getACL(username) # default owner ACL should grant full permissions acl_keys = ("create", "read", "update", "delete", "readACL", "updateACL") - #self.assertEqual(file_acl["userName"], "default") + # self.assertEqual(file_acl["userName"], "default") for k in acl_keys: self.assertEqual(file_acl[k], True) @@ -255,7 +251,7 @@ def test_auth(self): default_acl = f.getACL("default") except IOError as ioe: if ioe.errno == 404: - pass # expected + pass # expected # create public-read ACL default_acl = {} @@ -272,7 +268,7 @@ def test_auth(self): try: f = h5py.File(filename, 'r', username=self.test_user2["name"], password=self.test_user2["password"]) f.close() - except IOError as ioe: + except IOError: self.assertTrue(False) # test_user2 has read access, but opening in write mode should fail @@ -303,7 +299,7 @@ def test_auth(self): # ooen with test_user2 should succeed for read mode try: f = h5py.File(filename, 'r', username=self.test_user2["name"], password=self.test_user2["password"]) - except IOError as ioe: + except IOError: self.assertTrue(False) # test_user2 opening in write mode should still fail @@ -316,7 +312,7 @@ def test_auth(self): # append mode w/ test_user2 try: f = h5py.File(filename, 'a', username=self.test_user2["name"], password=self.test_user2["password"]) - except IOError as ioe: + except IOError: self.assertTrue(False) # shouldn't get here grp = f['/'] @@ -351,7 +347,6 @@ def test_close(self): self.assertFalse(f) - if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_folder.py b/test/hl/test_folder.py index 9c760ea2..0c541001 100644 --- a/test/hl/test_folder.py +++ b/test/hl/test_folder.py @@ -24,13 +24,11 @@ class TestFolders(TestCase): - def test_list(self): - #loglevel = logging.DEBUG - #logging.basicConfig( format='%(asctime)s %(message)s', level=loglevel) + # loglevel = logging.DEBUG + # logging.basicConfig( format='%(asctime)s %(message)s', level=loglevel) test_domain = self.getFileName("folder_test") - filepath = self.getPathFromDomain(test_domain) print(filepath) # create test file if not present. @@ -42,12 +40,13 @@ def test_list(self): return self.assertTrue(f.id.id is not None) - + f.close() - folder_name = op.dirname(filepath) + '/' + print(f" folder filepath = {filepath}") + folder_name = op.dirname(filepath) + '/' - d = h5py.Folder(folder_name) # get folder object + d = h5py.Folder(folder_name) # get folder object self.assertEqual(d.domain, folder_name) self.assertTrue(d.modified) @@ -78,12 +77,12 @@ def test_list(self): self.assertFalse(test_domain_found) test_domain_found = True item = d[name] - #'owner': 'test_user1', - #'created': 1496729517.2346532, - #'class': 'domain', - #'name': '/org/hdfgroup/h5pyd_test/bool_dset', - #'lastModified': 1496729517.2346532 - #self.assertTrue("created" in item) + # 'owner': 'test_user1', + # 'created': 1496729517.2346532, + # 'class': 'domain', + # 'name': '/org/hdfgroup/h5pyd_test/bool_dset', + # 'lastModified': 1496729517.2346532 + # self.assertTrue("created" in item) self.assertTrue("owner" in item) self.assertTrue("class" in item) self.assertTrue("name" in item) @@ -100,7 +99,7 @@ def test_list(self): d.close() # open in verbose mode - d = h5py.Folder(folder_name, verbose=True) # get folder object + d = h5py.Folder(folder_name, verbose=True) # get folder object self.assertEqual(d.domain, folder_name) self.assertTrue(d.modified) @@ -131,12 +130,12 @@ def test_list(self): self.assertFalse(test_domain_found) test_domain_found = True item = d[name] - #'owner': 'test_user1', - #'created': 1496729517.2346532, - #'class': 'domain', - #'name': '/org/hdfgroup/h5pyd_test/bool_dset', - #'lastModified': 1496729517.2346532 - #self.assertTrue("created" in item) + # 'owner': 'test_user1', + # 'created': 1496729517.2346532, + # 'class': 'domain', + # 'name': '/org/hdfgroup/h5pyd_test/bool_dset', + # 'lastModified': 1496729517.2346532 + # self.assertTrue("created" in item) self.assertTrue("owner" in item) self.assertTrue("class" in item) self.assertTrue("name" in item) @@ -162,7 +161,6 @@ def test_list(self): self.assertEqual(i, count) d.close() - # try opening a domain object as a folder f = h5py.Folder(filepath + '/') count = len(f) @@ -171,7 +169,6 @@ def test_list(self): self.assertTrue(False) # unexpected f.close() - def test_create_folder(self): empty = self.getFileName("empty") empty_path = self.getPathFromDomain(empty) @@ -197,11 +194,9 @@ def test_create_folder(self): self.assertTrue(d.is_folder) d.close() - def test_root_folder(self): test_domain = self.getFileName("folder_test") - filepath = self.getPathFromDomain(test_domain) f = h5py.File(filepath, mode='a') self.assertTrue(f.id.id is not None) @@ -209,7 +204,7 @@ def test_root_folder(self): # Folders not supported for h5py f.close() return - + f.close() path_components = filepath.split('/') @@ -229,6 +224,7 @@ def test_root_folder(self): d.close() self.assertTrue(found) + if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_group.py b/test/hl/test_group.py index 82b0764b..ba548b59 100644 --- a/test/hl/test_group.py +++ b/test/hl/test_group.py @@ -20,6 +20,7 @@ from datetime import datetime import os.path + class TestGroup(TestCase): def test_create(self): @@ -78,7 +79,7 @@ def test_create(self): r.create_group('g1') self.assertTrue(False) except ValueError: - pass # expected + pass # expected r.create_group('g3') self.assertEqual(len(r), 3) @@ -99,7 +100,7 @@ def test_create(self): r['g1.1'] = g1_1 self.assertTrue(False) # shouldn't get here' except RuntimeError: - pass # expected + pass # expected del r['tmp'] self.assertEqual(len(r), 4) @@ -139,7 +140,7 @@ def test_create(self): self.assertEqual(external_link.path, 'somepath') external_link_filename = external_link.filename self.assertTrue(external_link_filename.find('link_target') > -1) - + links = r.items() got_external_link = False for link in links: @@ -185,7 +186,7 @@ def test_create(self): self.assertTrue(name in f) self.assertTrue("/g1/g1.1" in f) g1_1 = f["/g1/g1.1"] - + if is_hsds: linkee_class = r.get('mysoftlink', getclass=True) # TBD: investigate why h5py returned None here @@ -198,16 +199,10 @@ def test_create(self): self.assertEqual(linked_obj.id, g1_1.id) f.close() - - - def test_nested_create(self): filename = self.getFileName("create_nested_group") print("filename:", filename) f = h5py.File(filename, 'w') - is_hsds = False - if isinstance(f.id.id, str) and f.id.id.startswith("g-"): - is_hsds = True # HSDS has different permission defaults self.assertTrue('/' in f) r = f['/'] self.assertEqual(len(r), 0) @@ -226,7 +221,6 @@ def test_nested_create(self): f.close() - def test_external_links(self): # create a file for use a link target if config.get("use_h5py"): @@ -239,11 +233,8 @@ def test_external_links(self): else: rel_filepath = "linked_file.h5" f = h5py.File(linked_filename, 'w') - is_hsds = False - if isinstance(f.id.id, str) and f.id.id.startswith("g-"): - is_hsds = True g1 = f.create_group("g1") - dset = g1.create_dataset('ds', (5,7), dtype='f4') + dset = g1.create_dataset('ds', (5, 7), dtype='f4') dset_id = dset.id.id f.close() @@ -258,14 +249,14 @@ def test_external_links(self): linked_obj = f["missing_link"] self.assertTrue(False) except KeyError: - pass # expected + pass # expected linked_obj = f["abspath_link"] self.assertTrue(linked_obj.name, "/g1/ds") self.assertEqual(linked_obj.shape, (5, 7)) # The following no longer works for h5py 2.8 # self.assertEqual(linked_obj.id.id, dset_id) - + linked_obj = f["relpath_link"] self.assertTrue(linked_obj.name, "/g1/ds") self.assertEqual(linked_obj.shape, (5, 7)) @@ -290,7 +281,9 @@ def get_count(grp): f = h5py.File(filename, 'w') g1 = f.create_group("g1") - dset = g1.create_dataset('ds', (5,7), dtype='f4') + dset = g1.create_dataset('ds', (5, 7), dtype='f4') + + self.assertNotEqual(dset, None) self.assertEqual(len(g1), 1) self.assertEqual(get_count(g1), 1) @@ -305,12 +298,9 @@ def get_count(grp): self.assertEqual(len(g1_clone), 0) self.assertEqual(get_count(g1_clone), 0) - f.close() - - if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_table.py b/test/hl/test_table.py index 5c65b766..818e6257 100644 --- a/test/hl/test_table.py +++ b/test/hl/test_table.py @@ -29,7 +29,7 @@ def test_create_table(self): filename = self.getFileName("create_table_dset") print("filename:", filename) if config.get("use_h5py"): - return # Table not supported with h5py + return # Table not supported with h5py f = h5py.File(filename, "w") count = 10 @@ -39,7 +39,7 @@ def test_create_table(self): elem = table[0] for i in range(count): - theta = (4.0 * math.pi)*(float(i)/float(count)) + theta = (4.0 * math.pi) * (float(i) / float(count)) elem['real'] = math.cos(theta) elem['img'] = math.sin(theta) table[i] = elem @@ -64,14 +64,13 @@ def test_create_table(self): arr = table.read(start=5, stop=6) self.assertEqual(arr.shape, (1,)) - f.close() def test_query_table(self): filename = self.getFileName("query_compound_dset") print("filename:", filename) if config.get("use_h5py"): - return # Table not supported with h5py + return # Table not supported with h5py f = h5py.File(filename, "w") # write entire array @@ -100,7 +99,7 @@ def test_query_table(self): for indx in range(len(data)): row = table[indx] item = data[indx] - for col in range(2,3): + for col in range(2, 3): # first two columns will come back as bytes, not strs self.assertEqual(row[col], item[col]) @@ -108,7 +107,7 @@ def test_query_table(self): indx = 0 for row in cursor: item = data[indx] - for col in range(2,3): + for col in range(2, 3): # first two columns will come back as bytes, not strs self.assertEqual(row[col], item[col]) indx += 1 @@ -118,7 +117,7 @@ def test_query_table(self): indx = 2 for row in cursor: item = data[indx] - for col in range(2,3): + for col in range(2, 3): # first two columns will come back as bytes, not strs self.assertEqual(row[col], item[col]) indx += 1 @@ -127,7 +126,7 @@ def test_query_table(self): condition = "symbol == b'AAPL'" quotes = table.read_where(condition) self.assertEqual(len(quotes), 4) - expected_indices = [1,4,7,10] + expected_indices = [1, 4, 7, 10] for i in range(4): quote = quotes[i] self.assertEqual(len(quote), 5) @@ -145,9 +144,9 @@ def test_query_table(self): self.assertEqual(len(row), 5) num_rows += 1 self.assertEqual(num_rows, 4) - + # try a compound query - condition = "(open > 3000) & (open < 3100)" + condition = "(open > 3000) & (open < 3100)" quotes = table.read_where(condition) self.assertEqual(len(quotes), 5) @@ -155,13 +154,13 @@ def test_query_table(self): quote = quotes[i] self.assertTrue(quote[3] > 3000) self.assertTrue(quote[3] < 3100) - + # try modifying specific rows condition = "symbol == b'AAPL'" update_val = {"open": 123} indices = table.update_where(condition, update_val) self.assertEqual(len(indices), 4) - self.assertEqual(list(indices), [1,4,7,10]) + self.assertEqual(list(indices), [1, 4, 7, 10]) row = tuple(table[4]) self.assertEqual(row, (b'AAPL', b'20170103', 123, 3034)) @@ -173,6 +172,7 @@ def test_query_table(self): self.assertEqual(list(indices), [1]) f.close() + if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_visit.py b/test/hl/test_visit.py index 40a0c37a..cc715e91 100644 --- a/test/hl/test_visit.py +++ b/test/hl/test_visit.py @@ -17,6 +17,7 @@ import h5pyd as h5py from common import ut, TestCase + class TestVisit(TestCase): def test_visit(self): visit_names = [] @@ -44,7 +45,6 @@ def find_g1_1(name): f["/g1/soft"] = h5py.SoftLink('/g2') f.close() - # re-open as read-only f = h5py.File(filename, 'r') f.visit(visit_item) @@ -107,7 +107,6 @@ def visit_multilink(name, obj): self.assertEqual(len(visited_ids), len(obj_ids)) - if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_vlentype.py b/test/hl/test_vlentype.py index 5edde3ba..a1c0da43 100644 --- a/test/hl/test_vlentype.py +++ b/test/hl/test_vlentype.py @@ -24,17 +24,15 @@ class TestVlenTypes(TestCase): - def test_create_vlen_attr(self): filename = self.getFileName("create_vlen_attribute") print("filename:", filename) if config.get("use_h5py"): # TBD - skipping as this core dumps in travis for some reason - #return + # return return - - f = h5py.File(filename, 'w') + f = h5py.File(filename, 'w') g1 = f.create_group('g1') g1_1 = g1.create_group('g1_1') @@ -46,8 +44,8 @@ def test_create_vlen_attr(self): # create an attribute that is a VLEN int32 dtvlen = h5py.special_dtype(vlen=np.dtype('int32')) - e0 = np.array([0,1,2]) - e1 = np.array([0,1,2,3]) + e0 = np.array([0, 1, 2]) + e1 = np.array([0, 1, 2, 3]) data = np.array([e0, e1], dtype=object) g1.attrs.create("a1", data, shape=(2,), dtype=dtvlen) @@ -58,22 +56,22 @@ def test_create_vlen_attr(self): self.assertTrue(isinstance(ret_val[0], np.ndarray)) # py36 attribute[a1]: [array([0, 1, 2], dtype=int32) array([0, 1, 2, 3], dtype=int32)] # py27 [(0, 1, 2) (0, 1, 2, 3)] - self.assertEqual(list(ret_val[0]), [0,1,2]) + self.assertEqual(list(ret_val[0]), [0, 1, 2]) self.assertEqual(ret_val[0].dtype, np.dtype('int32')) self.assertTrue(isinstance(ret_val[1], np.ndarray)) self.assertEqual(ret_val[1].dtype, np.dtype('int32')) - self.assertEqual(list(ret_val[1]), [0,1,2,3]) + self.assertEqual(list(ret_val[1]), [0, 1, 2, 3]) # create an attribute that is VLEN ObjRef dtref = h5py.special_dtype(ref=h5py.Reference) dtvlen = h5py.special_dtype(vlen=dtref) e0 = np.array((g1_1.ref,), dtype=dtref) - e1 = np.array((g1_1.ref,g1_2.ref), dtype=dtref) - e2 = np.array((g1_1.ref,g1_2.ref,g1_3.ref), dtype=dtref) - data = [e0,e1,e2] + e1 = np.array((g1_1.ref, g1_2.ref), dtype=dtref) + e2 = np.array((g1_1.ref, g1_2.ref, g1_3.ref), dtype=dtref) + data = [e0, e1, e2] - g1.attrs.create("b1", data, shape=(3,),dtype=dtvlen) + g1.attrs.create("b1", data, shape=(3,), dtype=dtvlen) vlen_val = g1.attrs["b1"] # read back attribute self.assertTrue(isinstance(vlen_val, np.ndarray)) @@ -82,11 +80,11 @@ def test_create_vlen_attr(self): e = vlen_val[i] self.assertTrue(isinstance(e, np.ndarray)) ref_type = h5py.check_dtype(ref=e.dtype) - + self.assertEqual(ref_type, h5py.Reference) # TBD - h5pyd is returning shape of () rather than (1,) for singletons - if i>0: - self.assertEqual(e.shape, ((i+1),)) + if i > 0: + self.assertEqual(e.shape, ((i + 1),)) # first element is always a ref to g1 refd_group = f[e[0]] self.assertEqual(refd_group.attrs['name'], 'g1_1') @@ -109,15 +107,14 @@ def test_create_vlen_attr(self): self.assertTrue(isinstance(item, np.void)) self.assertEqual(len(item), 2) e = item[0] - self.assertEqual(len(e), i+2) + self.assertEqual(len(e), i + 2) refd_group = f[e[0]] self.assertEqual(refd_group.attrs['name'], 'g1_1') - self.assertEqual(item[1], i+1) + self.assertEqual(item[1], i + 1) # close file f.close() - def test_create_vlen_dset(self): filename = self.getFileName("create_vlen_dset") print("filename:", filename) @@ -149,42 +146,40 @@ def test_create_vlen_dset(self): e1 = ret_val[1] self.assertTrue(isinstance(e1, np.ndarray)) self.assertEqual(e1.shape, (0,)) - + # create numpy object array - e0 = np.array([1,2,3],dtype='uint16') - e1 = np.array([1,2,3,4],dtype='uint16') + e0 = np.array([1, 2, 3], dtype='uint16') + e1 = np.array([1, 2, 3, 4], dtype='uint16') data = np.array([e0, e1], dtype=dtvlen) # write data dset1[...] = data # read back data - e = dset1[0] ret_val = dset1[...] self.assertTrue(isinstance(ret_val, np.ndarray)) self.assertEqual(len(ret_val), 2) self.assertTrue(isinstance(ret_val[0], np.ndarray)) # py36 attribute[a1]: [array([0, 1, 2], dtype=int32) array([0, 1, 2, 3], dtype=int32)] # py27 [(0, 1, 2) (0, 1, 2, 3)] - self.assertEqual(list(ret_val[0]), [1,2,3]) + self.assertEqual(list(ret_val[0]), [1, 2, 3]) self.assertEqual(ret_val[0].dtype, np.dtype('uint16')) self.assertTrue(isinstance(ret_val[1], np.ndarray)) self.assertEqual(ret_val[1].dtype, np.dtype('uint16')) - self.assertEqual(list(ret_val[1]), [1,2,3,4]) + self.assertEqual(list(ret_val[1]), [1, 2, 3, 4]) # Read back just one element e0 = dset1[0] self.assertEqual(len(e0), 3) - self.assertEqual(list(e0), [1,2,3]) - + self.assertEqual(list(e0), [1, 2, 3]) + # try writing int arrays into dataset - data = [42,] + data = [42,] dset1[0] = data ret_val = dset1[...] self.assertEqual(list(ret_val[0]), [42]) - # TBD: Test for VLEN objref and comount as with attribute test above # close file @@ -196,7 +191,7 @@ def test_create_vlen_compound_dset(self): f = h5py.File(filename, "w") count = 10 - # create a dataset that is a VLEN int32 + # create a dataset that is a VLEN int32 dtvlen = h5py.special_dtype(vlen=np.dtype('int32')) dt = np.dtype([('x', np.int32), ('vals', dtvlen)]) dset = f.create_dataset('compound_vlen', (count,), dtype=dt) @@ -204,7 +199,7 @@ def test_create_vlen_compound_dset(self): elem = dset[0] for i in range(count): elem['x'] = i - elem['vals'] = np.array(list(range(i+1)), dtype=np.int32) + elem['vals'] = np.array(list(range(i + 1)), dtype=np.int32) dset[i] = elem e = dset[5] @@ -212,7 +207,7 @@ def test_create_vlen_compound_dset(self): self.assertEqual(e[0], 5) e1 = list(e[1]) self.assertEqual(e1, list(range(6))) - + f.close() def test_create_vlen_2d_dset(self): @@ -228,16 +223,16 @@ def test_create_vlen_2d_dset(self): nrows = 2 ncols = 3 - dset1 = f.create_dataset("dset1", shape=(nrows,ncols), dtype=dtvlen) + dset1 = f.create_dataset("dset1", shape=(nrows, ncols), dtype=dtvlen) # create numpy object array - data = np.zeros((nrows,ncols), dtype=dtvlen) + data = np.zeros((nrows, ncols), dtype=dtvlen) for i in range(nrows): for j in range(ncols): alist = [] - for k in range((i+1)*(j+1)): + for k in range((i + 1) * (j + 1)): alist.append(k) - data[i,j] = np.array(alist, dtype="int32") + data[i, j] = np.array(alist, dtype="int32") # write data dset1[...] = data @@ -246,26 +241,25 @@ def test_create_vlen_2d_dset(self): ret_val = dset1[...] self.assertTrue(isinstance(ret_val, np.ndarray)) self.assertEqual(ret_val.shape, (nrows, ncols)) - e12 = ret_val[1,2] + e12 = ret_val[1, 2] self.assertTrue(isinstance(e12, np.ndarray)) # py36 attribute[a1]: [array([0, 1, 2], dtype=int32) array([0, 1, 2, 3], dtype=int32)] # py27 [(0, 1, 2) (0, 1, 2, 3)] - self.assertEqual(list(e12), [0,1,2,3,4,5]) + self.assertEqual(list(e12), [0, 1, 2, 3, 4, 5]) self.assertEqual(e12.dtype, np.dtype('int32')) # Read back just one element - e12 = dset1[1,2] + e12 = dset1[1, 2] self.assertTrue(isinstance(e12, np.ndarray)) self.assertEqual(e12.shape, (6,)) # py36 attribute[a1]: [array([0, 1, 2], dtype=int32) array([0, 1, 2, 3], dtype=int32)] # py27 [(0, 1, 2) (0, 1, 2, 3)] - self.assertEqual(list(e12), [0,1,2,3,4,5]) + self.assertEqual(list(e12), [0, 1, 2, 3, 4, 5]) self.assertEqual(e12.dtype, np.dtype('int32')) # close file f.close() - def test_variable_len_str_attr(self): filename = self.getFileName("variable_len_str_dset") print("filename:", filename) @@ -280,7 +274,6 @@ def test_variable_len_str_attr(self): dt = h5py.special_dtype(vlen=bytes) f.attrs.create('a1', words, shape=dims, dtype=dt) - vals = f.attrs["a1"] # read back self.assertTrue("vlen" in vals.dtype.metadata) @@ -290,7 +283,6 @@ def test_variable_len_str_attr(self): f.close() - def test_variable_len_str_dset(self): filename = self.getFileName("variable_len_str_dset") print("filename:", filename) @@ -298,7 +290,7 @@ def test_variable_len_str_dset(self): # TBD - skipping as this core dumps in travis for some reason return f = h5py.File(filename, "w") - + dims = (10,) dt = h5py.special_dtype(vlen=bytes) dset = f.create_dataset('variable_len_str_dset', dims, dtype=dt) @@ -359,7 +351,8 @@ def test_variable_len_unicode_dset(self): self.assertEqual(dset[0], b'') - words = (u"one: \u4e00", u"two: \u4e8c", u"three: \u4e09", u"four: \u56db", u"five: \u4e94", u"six: \u516d", u"seven: \u4e03", u"eight: \u516b", u"nine: \u4e5d", u"ten: \u5341") + words = (u"one: \u4e00", u"two: \u4e8c", u"three: \u4e09", u"four: \u56db", u"five: \u4e94", + u"six: \u516d", u"seven: \u4e03", u"eight: \u516b", u"nine: \u4e5d", u"ten: \u5341") dset[:] = words vals = dset[:] # read back @@ -379,12 +372,13 @@ def test_variable_len_unicode_attr(self): dims = (10,) dt = h5py.special_dtype(vlen=str) - words = (u"one: \u4e00", u"two: \u4e8c", u"three: \u4e09", u"four: \u56db", u"five: \u4e94", u"six: \u516d", u"seven: \u4e03", u"eight: \u516b", u"nine: \u4e5d", u"ten: \u5341") + words = (u"one: \u4e00", u"two: \u4e8c", u"three: \u4e09", u"four: \u56db", u"five: \u4e94", + u"six: \u516d", u"seven: \u4e03", u"eight: \u516b", u"nine: \u4e5d", u"ten: \u5341") f.attrs.create('a1', words, shape=dims, dtype=dt) vals = f.attrs["a1"] # read back - #print("type:", type(vals)) + # print("type:", type(vals)) self.assertTrue("vlen" in vals.dtype.metadata) for i in range(10):