diff --git a/Dockerfile b/Dockerfile index 3e881bbf..9a75164b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,13 +12,18 @@ RUN mkdir /usr/local/src/hsds/ \ /usr/local/src/hsds/hsds/util/ \ /etc/hsds/ -COPY setup.py /usr/local/src/hsds/ +COPY pyproject.toml /usr/local/src/hsds/ +COPY setup.cfg /user/local/src/hsds/ COPY hsds/*.py /usr/local/src/hsds/hsds/ COPY hsds/util/*.py /usr/local/src/hsds/hsds/util/ COPY admin/config/config.yml /etc/hsds/ COPY admin/config/config.yml /usr/local/src/hsds/admin/config/ COPY entrypoint.sh / -RUN /bin/bash -c 'cd /usr/local/src/hsds; pip install -e ".[azure]" ; cd -' +RUN /bin/bash -c 'cd /usr/local/src/hsds; \ + pip install build;\ + python -m build;\ + pip install -v . ;\ + cd -' EXPOSE 5100-5999 ENTRYPOINT ["/bin/bash", "-c", "/entrypoint.sh"] diff --git a/README.md b/README.md index d7d4e8f6..0616fbdf 100755 --- a/README.md +++ b/README.md @@ -5,27 +5,27 @@ HSDS is a web service that implements a REST-based web service for HDF5 data stores. Data can be stored in either a POSIX files system, or using object-based storage such as AWS S3, Azure Blob Storage, or [MinIO](https://min.io). -HSDS can be run a single machine using Docker or on a cluster using Kubernetes (or AKS on Microsoft Azure). +HSDS can be run a single machine with or without Docker or on a cluster using Kubernetes (or AKS on Microsoft Azure). In addition, HSDS can be run in serverless mode with AWS Lambda or h5pyd local mode. ## Quick Start -Make sure you have Python 3, Pip, and git installed, then: - -1. Clone this repo: `$ git clone https://github.com/HDFGroup/hsds` -2. Go to the hsds directory: `$ cd hsds` -3. Run install: `$ python setup.py install` OR install from pypi: `$ pip install hsds` -4. Setup password file: `$ cp admin/config/passwd.default admin/config/passwd.txt` -5. Create a directory the server will use to store data, and then set the ROOT_DIR environment variable to point to it: `$ mkdir hsds_data; export ROOT_DIR="${PWD}/hsds_data"` For Windows: `C:> set ROOT_DIR=%CD%\hsds_data` -6. Create the hsds test bucket: `$ mkdir hsds_data/hsdstest` -7. Start server: `$ ./runall.sh --no-docker` For Windows: `C:> runall.bat` -8. In a new shell, set environment variables for the admin account: `$ export ADMIN_USERNAME=admin` and `$ export ADMIN_PASSWORD=admin` (adjust for any changes made to the passwd.txt file). For Windows - use the corresponding set commands -9. Run the test suite: `$ python testall.py --skip_unit` -10. (Optional) Post install setup (test data, home folders, cli tools, etc): [docs/post_install.md](docs/post_install.md) -11. (Optional) Install the h5pyd package for an h5py compatible api and tool suite: https://github.com/HDFGroup/h5pyd - -To shut down the server, and the server was started with the --no-docker option, just control-C. +Make sure you have Python 3 and Pip installed, then: + +1. Run install: `$ ./build.sh` from source tree OR install from pypi: `$ pip install hsds` +2. Create a directory the server will use to store data, example: `$ mkdir ~/hsds_data` +3. Start server: `$ hsds --root_dir ~/hsds_data` +4. Run the test suite. In a separate terminal run: + - Set user_name: `$ export USER_NAME=$USER` + - Set user_password: `$ export USER_PASSWORD=$USER` + - Set admin name: `$ export ADMIN_USERNAME=$USER` + - Set admin password: `$ $export ADMIN_PASSWORD=$USER` + - Run test suite: `$ python testall.py --skip_unit` +5. (Optional) Install the h5pyd package for an h5py compatible api and tool suite: https://github.com/HDFGroup/h5pyd +6. (Optional) Post install setup (test data, home folders, cli tools, etc): [docs/post_install.md](docs/post_install.md) + +To shut down the server, and the server is not running in Docker, just control-C. If using docker, run: `$ ./stopall.sh` diff --git a/build.sh b/build.sh index 191ae0c7..86bd4847 100755 --- a/build.sh +++ b/build.sh @@ -27,11 +27,17 @@ if [ $run_pyflakes ]; then fi fi -echo "running setup.py" -python setup.py install +pip install --upgrade build -echo "clean stopped containers" -docker rm -v $(docker ps -aq -f status=exited) +echo "running build" +python -m build +pip install -v . -echo "building docker image" -docker build -t hdfgroup/hsds . +command -v docker +if [ $? -ne 1 ]; then + echo "clean stopped containers" + docker rm -v $(docker ps -aq -f status=exited) + + echo "building docker image" + docker build -t hdfgroup/hsds . +fi diff --git a/entrypoint.sh b/entrypoint.sh index ef8b123f..3850854f 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -22,10 +22,6 @@ elif [ $NODE_TYPE == "head_node" ]; then echo "running hsds-headnode" export PYTHONUNBUFFERED="1" hsds-headnode -elif [ $NODE_TYPE == "rangeget" ]; then - echo "running hsds-rangeget" - export PYTHONUNBUFFERED="1" - hsds-rangeget else echo "Unknown NODE_TYPE: " $NODE_TYPE fi diff --git a/hsds/app.py b/hsds/app.py index a88b3a2d..7d58fd9c 100644 --- a/hsds/app.py +++ b/hsds/app.py @@ -15,24 +15,33 @@ import sys import logging import time -import uuid from .hsds_app import HsdsApp +from . import config -_HELP_USAGE = "Starts hsds a REST-based service for HDF5 data." +_HELP_USAGE = "Starts HSDS, a REST-based service for HDF5 data." _HELP_EPILOG = """Examples: +- with a POSIX-based storage using a directory: ./hsdata for storage: + + hsds --root_dir ~/hsdata + +- with POSIX-based storage and config settings and password file: + + hsds --root_dir ~/hsdata --password-file ./admin/config/passwd.txt \ + --config_dir ./admin/config + - with minio data storage: hsds --s3-gateway http://localhost:6007 --access-key-id demo:demo --secret-access-key DEMO_PASS --password-file ./admin/config/passwd.txt - --bucket-name hsds.test -- with a POSIX-based storage for 'hsds.test' sub-folder in the './data' - folder: +- with AWS S3 storage and a bucket in the us-west-2 region: + + hsds --s3-gateway http://s3.us-west-2.amazonaws.com --access-key-id ${AWS_ACCESS_KEY_ID} \ + --secret-access-key ${AWS_SECRET_ACCESS_KEY} --password-file ./admin/config/passwd.txt - hsds --bucket-dir ./data/hsds.test """ # maximum number of characters if socket directory is given @@ -139,14 +148,13 @@ def main(): epilog=_HELP_EPILOG, ) - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument( + parser.add_argument( "--root_dir", type=str, dest="root_dir", help="Directory where to store the object store data", ) - group.add_argument( + parser.add_argument( "--bucket_name", nargs=1, type=str, @@ -197,7 +205,7 @@ def main(): ) parser.add_argument( "--count", - default=1, + default=4, type=int, dest="dn_count", help="Number of dn sub-processes to create.", @@ -241,16 +249,25 @@ def main(): print(f"unsupported log_level: {log_level_cfg}, using INFO instead") log_level = logging.INFO - print("set logging to:", log_level) + print("set logging to::", log_level) logging.basicConfig(level=log_level) userConfig = UserConfig() - # set username based on command line, .hscfg, $USER, or $JUPYTERHUB_USER + login_username = None + try: + login_username = os.getlogin() + except OSError: + pass # ignore + + # set username based on command line, .hscfg, or login user if args.hs_username: username = args.hs_username elif "HS_USERNAME" in userConfig: username = userConfig["HS_USERNAME"] + elif not args.password_file: + # no password file, add the login name as user + username = login_username else: username = None @@ -260,7 +277,7 @@ def main(): elif "HS_PASSWORD" in userConfig: password = userConfig["HS_PASSWORD"] else: - password = "1234" + password = login_username if username: kwargs["username"] = username @@ -271,38 +288,23 @@ def main(): sys.exit(f"password file: {args.password_file} not found") kwargs["password_file"] = args.password_file - if args.host: - # use TCP connect - kwargs["host"] = args.host + # use unix domain socket if a socket dir is set + if args.socket_dir: + socket_dir = os.path.abspath(args.socket_dir) + if not os.path.isdir(socket_dir): + raise FileNotFoundError(f"directory: {socket_dir} not found") + kwargs["socket_dir"] = socket_dir + else: + # USE TCP connect + if args.host: + kwargs["host"] = args.host + else: + kwargs["host"] = "localhost" # sn_port only relevant for TCP connections if args.port: kwargs["sn_port"] = args.port else: kwargs["sn_port"] = 5101 # TBD - use config - else: - # choose a tmp directory for socket if one is not provided - if args.socket_dir: - socket_dir = os.path.abspath(args.socket_dir) - if not os.path.isdir(socket_dir): - raise FileNotFoundError(f"directory: {socket_dir} not found") - else: - if "TMP" in os.environ: - # This should be set at least on Windows - tmp_dir = os.environ["TMP"] - print("set tmp_dir:", tmp_dir) - else: - tmp_dir = "/tmp" - if not os.path.isdir(tmp_dir): - raise FileNotFoundError(f"directory {tmp_dir} not found") - rand_name = uuid.uuid4().hex[:8] - socket_dir = os.path.join(tmp_dir, f"hs{rand_name}") - print("using socket dir:", socket_dir) - if len(socket_dir) > MAX_SOCKET_DIR_PATH_LEN: - raise ValueError( - f"length of socket_dir must be less than: {MAX_SOCKET_DIR_PATH_LEN}" - ) - os.mkdir(socket_dir) - kwargs["socket_dir"] = socket_dir if args.logfile: logfile = os.path.abspath(args.logfile) @@ -329,6 +331,27 @@ def main(): if args.dn_count: kwargs["dn_count"] = args.dn_count + if args.bucket_name: + bucket_name = args.bucket_name + else: + bucket_name = config.get("bucket_name") + if not bucket_name: + sys.exit("bucket_name not set") + if args.root_dir: + root_dir = args.root_dir + else: + root_dir = config.get("root_dir") + if not root_dir: + # check that AWS_S3_GATEWAY or AZURE_CONNECTION_STRING is set + if not config.get("aws_s3_gateway") and not config.get("azure_connection_string"): + sys.exit("root_dir not set (and no S3 or Azure connection info)") + else: + if not os.path.isdir(root_dir): + sys.exit(f"directory: {root_dir} not found") + bucket_path = os.path.join(root_dir, bucket_name) + if not os.path.isdir(bucket_path): + os.mkdir(bucket_path) + app = HsdsApp(**kwargs) app.run() diff --git a/hsds/basenode.py b/hsds/basenode.py index 3525c5a6..8dbd9799 100644 --- a/hsds/basenode.py +++ b/hsds/basenode.py @@ -33,7 +33,7 @@ from .util.k8sClient import getDnLabelSelector, getPodIps from . import hsds_logger as log -HSDS_VERSION = "0.8.1" +HSDS_VERSION = "0.8.2" def getVersion(): diff --git a/hsds/config.py b/hsds/config.py index d640c276..bbbf4a95 100755 --- a/hsds/config.py +++ b/hsds/config.py @@ -42,13 +42,21 @@ def getCmdLineArg(x): # return value of command-line option # use "--x=val" to set option 'x' to 'val' # use "--x" for boolean flags + option = "--" + x + "=" for i in range(1, len(sys.argv)): arg = sys.argv[i] + if i < len(sys.argv) - 1: + next_arg = sys.argv[i + 1] + else: + next_arg = None if arg == "--" + x: - # boolean flag debug(f"got cmd line flag for {x}") - return True + if next_arg is None or next_arg.startswith("-"): + # treat as a boolean flag + return True + else: + return next_arg elif arg.startswith(option): # found an override nlen = len(option) @@ -69,6 +77,7 @@ def _load_cfg(): config_dir = getCmdLineArg("config_dir") if config_dir: + eprint("got command line arg for config_dir:", config_dir) config_dirs.append(config_dir) if not config_dirs and "CONFIG_DIR" in os.environ: config_dirs.append(os.environ["CONFIG_DIR"]) diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py index 78153de6..ee82e28b 100755 --- a/hsds/domain_sn.py +++ b/hsds/domain_sn.py @@ -1123,11 +1123,6 @@ async def PUT_Domain(request): else: is_toplevel = False - if is_toplevel and not is_folder: - msg = "Only folder domains can be created at the top-level" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - if is_toplevel and not isAdminUser(app, username): msg = "creation of top-level domains is only supported by admin users" log.warn(msg) @@ -1164,7 +1159,7 @@ async def PUT_Domain(request): linked_json = await getDomainJson(app, l_d, reload=True) log.debug(f"got linked json: {linked_json}") if "root" not in linked_json: - msg = "Folder domains cannot ber used as link target" + msg = "Folder domains cannot be used as link target" log.warn(msg) raise HTTPBadRequest(reason=msg) root_id = linked_json["root"] diff --git a/hsds/hsds_app.py b/hsds/hsds_app.py index 1c5740ce..5924d722 100644 --- a/hsds/hsds_app.py +++ b/hsds/hsds_app.py @@ -274,6 +274,8 @@ def run(self): pargs = [py_exe, cmd_path, "--node_type=sn", "--log_prefix=sn "] if self._username: pargs.append(f"--hs_username={self._username}") + # make this user admin + pargs.append(f"--admin_user={self._username}") if self._password: pargs.append(f"--hs_password={self._password}") if self._password_file: diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py index 40a4d596..3e1e2946 100644 --- a/hsds/servicenode_lib.py +++ b/hsds/servicenode_lib.py @@ -270,6 +270,10 @@ async def getObjectIdByPath(app, obj_id, h5path, bucket=None, refresh=False, dom # find domain object is stored under domain = link_json["h5domain"] + if domain.startswith("hdf5:/"): + # strip off prefix + domain = domain[6:] + if bucket: domain = bucket + domain diff --git a/hsds/util/domainUtil.py b/hsds/util/domainUtil.py index 1db92a5b..3659b9e5 100644 --- a/hsds/util/domainUtil.py +++ b/hsds/util/domainUtil.py @@ -213,6 +213,10 @@ def getDomainFromRequest(request, validate=True, allow_dns=True): if not domain: raise ValueError("no domain") + if domain.startswith("hdf5:/"): + # strip off the prefix to make following logic easier + domain = domain[6:] + if domain[0] != "/": # DNS style hostname if validate: diff --git a/hsds/util/s3Client.py b/hsds/util/s3Client.py index 9b1b44ba..f2ddd7cc 100644 --- a/hsds/util/s3Client.py +++ b/hsds/util/s3Client.py @@ -145,6 +145,7 @@ def _get_client_kwargs(self): kwargs["endpoint_url"] = self._s3_gateway kwargs["use_ssl"] = self._use_ssl kwargs["config"] = self._aio_config + log.debug(f"s3 kwargs: {kwargs}") return kwargs def _renewToken(self): diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..8b526a15 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,90 @@ +[build-system] +requires = [ + "setuptools >=61", +] +build-backend = "setuptools.build_meta" + +[project] +name = "hsds" +description = "Rest-based Web Service for HDF data" +authors = [ + {name = "John Readey", email = "jreadey@hdfgroup.org"}, +] +maintainers = [ + {name = "John Readey", email = "jradey@hdfgroup.org"}, +] +license = {text = "Apache V2"} +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Licence", + "Operating System :: Unix", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering", + "Topic :: Database", + "Topic :: Software Development :: Libraries :: Python Modules", +] +requires-python = ">=3.8" +version = "0.8.2" + +dependencies = [ + "aiohttp == 3.8.5", + "aiobotocore == 2.5.0", + "aiohttp_cors", + "aiofiles", + "botocore", + "cryptography", + "numcodecs", + "numpy", + "psutil", + "pyjwt", + "pytz", + "pyyaml", + "requests-unixsocket", + "simplejson", +] + +[project.optional-dependencies] +azure = ["azure-storage-blob"] + +[project.readme] +text = """\ +HSDS is a web service that implements a REST-based web service for HDF5 data stores. +Data can be stored in either a POSIX files system, or using object-based storage such as +AWS S3, Azure Blob Storage, or [MinIO](https://min.io). +HSDS can be run a single machine or on a cluster using Kubernetes (or AKS on Microsoft Azure). + +In addition, HSDS can be run in serverless mode with AWS Lambda or h5pyd local mode. +""" +content-type = "text/x-rst" + +[project.urls] +Homepage = "https://www.hdfgroup.org/solutions/highly-scalable-data-service-hsds/" +Source = "https://github.com/HDFGroup/hsds" +Documentation = "https://github.com/HDFGroup/hsds/docs" +"Release notes" = "https://github.com/HDFGroup/hsds/releases/tag/v0.8.0" +"Discussion forum" = "https://forum.hdfgroup.org/c/hsds/" + +[tool.setuptools] +packages = ["hsds", "hsds.util", "admin"] +include-package-data = false + +[tool.setuptools.package-data] +# to ignore .pxd and .pyx files in wheels +#"*" = ["*.yml"] +admin = ["config/config.yml"] + +[project.scripts] +hsds = "hsds.app:main" +hsds-datanode = "hsds.datanode:main" +hsds-servicenode = "hsds.servicenode:main" +hsds-headnode = "hsds.headnode:main" +hsds-node = "hsds.node_runner:main" +hsds-chunklocator = "hsds.chunklocator:main" + diff --git a/setup.py b/setup.py deleted file mode 100644 index 2d2a748a..00000000 --- a/setup.py +++ /dev/null @@ -1,85 +0,0 @@ -from setuptools import setup - -# run: -# setup.py install -# or (if you'll be modifying the package): -# setup.py develop -# To use a consistent encoding -# To upload to PyPI: -# twine upload dist/* -# -# Tag the release in github! -# - -classifiers = [ - "Environment :: Console", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Natural Language :: English", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3", - "Topic :: Internet :: WWW/HTTP :: HTTP Servers", - "Topic :: Scientific/Engineering", -] - -# -# The specific versions given below are not desirable, -# but there are needed given aiobotocore and botocore -# requirements (e.g. see: https://github.com/boto/botocore/issues/2926#issuecomment-1535456104) -# as well as matching with packages present -# in the Docker build base image -# -install_requires = [ - "aiohttp==3.8.4", - "aiobotocore==2.5.0", - "aiohttp_cors", - "aiofiles", - "botocore", - "cryptography", - "numcodecs", - "numpy", - "psutil", - "pyjwt", - "pytz", - "pyyaml", - "requests-unixsocket", - "simplejson", -] - - -setup( - name="hsds", - version="0.8.1", - description="HDF REST API", - url="http://github.com/HDFGroup/hsds", - author="John Readey", - author_email="jreadey@hdfgrouup.org", - license="Apache", - packages=["hsds", "hsds.util", "admin"], - install_requires=install_requires, - setup_requires=["setuptools"], - extras_require={"azure": ["azure-storage-blob"]}, - zip_safe=False, - classifiers=classifiers, - include_package_data=True, - data_files=[ - ( - "admin", - [ - "admin/config/config.yml", - ], - ) - ], - entry_points={ - "console_scripts": [ - "hsds = hsds.app:main", - "hsds-datanode = hsds.datanode:main", - "hsds-servicenode = hsds.servicenode:main", - "hsds-headnode = hsds.headnode:main", - "hsds-rangeget = hsds.rangeget_proxy:main", - "hsds-node = hsds.node_runner:main", - "hsds-chunklocator = hsds.chunklocator:main" - ] - }, -) diff --git a/tests/integ/acl_test.py b/tests/integ/acl_test.py index ef0f943a..22833a3b 100644 --- a/tests/integ/acl_test.py +++ b/tests/integ/acl_test.py @@ -103,9 +103,12 @@ def testGetAcl(self): # try fetching an ACL from a user who doesn't have readACL permissions req = helper.getEndpoint() + "/acls/" + username user2name = config.get("user2_name") - headers = helper.getRequestHeaders(domain=self.base_domain, username=user2name) - rsp = self.session.get(req, headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + if user2name: + headers = helper.getRequestHeaders(domain=self.base_domain, username=user2name) + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("user2_name not set") def testGetAcls(self): print("testGetAcls", self.base_domain) @@ -224,9 +227,12 @@ def testGetAcls(self): # try fetching ACLs from a user who doesn't have readACL permissions req = helper.getEndpoint() + "/acls" user2name = config.get("user2_name") - headers = helper.getRequestHeaders(domain=self.base_domain, username=user2name) - rsp = self.session.get(req, headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + if user2name: + headers = helper.getRequestHeaders(domain=self.base_domain, username=user2name) + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("user2name not set") def testPutAcl(self): print("testPutAcl", self.base_domain) @@ -234,6 +240,9 @@ def testPutAcl(self): # create an ACL for "test_user2" with read and update access user2name = config.get("user2_name") + if not user2name: + print("user2_name not set") + return req = helper.getEndpoint() + "/acls/" + user2name perm = {"read": True, "update": True} diff --git a/tests/integ/attr_test.py b/tests/integ/attr_test.py index c8fc172c..072d25f4 100644 --- a/tests/integ/attr_test.py +++ b/tests/integ/attr_test.py @@ -14,6 +14,7 @@ import json import numpy as np import helper +import config class AttributeTest(unittest.TestCase): @@ -238,11 +239,15 @@ def testObjAttr(self): self.assertEqual(rsp.status_code, 404) # not found # try adding the attribute as a different user - headers = helper.getRequestHeaders( - domain=self.base_domain, username="test_user2" - ) - rsp = self.session.put(req, data=json.dumps(attr_payload), headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + user2_name = config.get("user2_name") + if user2_name: + headers = helper.getRequestHeaders( + domain=self.base_domain, username="test_user2" + ) + rsp = self.session.put(req, data=json.dumps(attr_payload), headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("user2_name not set") # try adding again with original user, but outside this domain another_domain = helper.getParentDomain(self.base_domain) diff --git a/tests/integ/config.py b/tests/integ/config.py index f8ad3194..4e07228f 100755 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -14,12 +14,12 @@ cfg = { "hsds_endpoint": "http://localhost:5101", # or 'http+unix://%2Ftmp%2Fhs%2Fsn_1.sock' for socket - "head_endpoint": "http://localhost:5100", - "rangeget_endpoint": "http://localhost:6900", "user_name": "test_user1", "user_password": "test", - "user2_name": "test_user2", - "user2_password": "test", + "user2_name": "", + "user2_password": "", + "admin_username": "", + "admin_password": "", "test_noauth": True, "default_public": False, # set to true if the server makes new domains publicly readable "bucket_name": "", # bucket name to be used for requests diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py index ff50d138..0f6b930c 100755 --- a/tests/integ/dataset_test.py +++ b/tests/integ/dataset_test.py @@ -134,14 +134,18 @@ def testScalarDataset(self): # self.assertTrue("allocated_size" in rspJson) # try get with a different user (who has read permission) - headers = helper.getRequestHeaders(domain=domain, username="test_user2") - rsp = self.session.get(req, headers=headers) - if config.get("default_public"): - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - self.assertEqual(rspJson["id"], dset_id) + user2_name = config.get('user2_name') + if user2_name: + headers = helper.getRequestHeaders(domain=domain, username=user2_name) + rsp = self.session.get(req, headers=headers) + if config.get("default_public"): + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertEqual(rspJson["id"], dset_id) + else: + self.assertEqual(rsp.status_code, 403) else: - self.assertEqual(rsp.status_code, 403) + print('user2_name not set') # try to do a GET with a different domain (should fail) another_domain = self.base_domain + "/testScalarDataset2.h5" @@ -152,9 +156,12 @@ def testScalarDataset(self): self.assertEqual(rsp.status_code, 400) # try DELETE with user who doesn't have create permission on this domain - headers = helper.getRequestHeaders(domain=domain, username="test_user2") - rsp = self.session.delete(req, headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + if user2_name: + headers = helper.getRequestHeaders(domain=domain, username=user2_name) + rsp = self.session.delete(req, headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("user2_name not set") # try to do a DELETE with a different domain (should fail) # Test creation/deletion of scalar dataset obj @@ -475,9 +482,13 @@ def testDelete(self): self.assertEqual(rspJson["id"], dset_id) # try DELETE with user who doesn't have create permission on this domain - headers = helper.getRequestHeaders(domain=domain, username="test_user2") - rsp = self.session.delete(req, headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + user2_name = config.get('user2_name') + if user2_name: + headers = helper.getRequestHeaders(domain=domain, username=user2_name) + rsp = self.session.delete(req, headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("test_user2 not set") # try to do a DELETE with a different domain (should fail) another_domain = helper.getParentDomain(domain) diff --git a/tests/integ/datatype_test.py b/tests/integ/datatype_test.py index 788cdee9..deb576a4 100755 --- a/tests/integ/datatype_test.py +++ b/tests/integ/datatype_test.py @@ -72,16 +72,26 @@ def testCommittedType(self): self.assertEqual(type_json["base"], "H5T_IEEE_F32LE") # try get with a different user (who has read permission) - headers = helper.getRequestHeaders( - domain=self.base_domain, username="test_user2" - ) - rsp = self.session.get(req, headers=headers) - if config.get("default_public"): - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - self.assertEqual(rspJson["root"], root_uuid) + test_user2 = config.get("user2_name") # some tests will be skipped if not + if test_user2: + headers = helper.getRequestHeaders( + domain=self.base_domain, username="test_user2" + ) + rsp = self.session.get(req, headers=headers) + if config.get("default_public"): + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertEqual(rspJson["root"], root_uuid) + else: + self.assertEqual(rsp.status_code, 403) + # try DELETE with user who doesn't have create permission on this domain + headers = helper.getRequestHeaders( + domain=self.base_domain, username="test_user2" + ) + rsp = self.session.delete(req, headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden else: - self.assertEqual(rsp.status_code, 403) + print('test_user2 not set') # try to do a GET with a different domain (should fail) another_domain = helper.getParentDomain(self.base_domain) @@ -89,13 +99,6 @@ def testCommittedType(self): rsp = self.session.get(req, headers=headers) self.assertEqual(rsp.status_code, 400) - # try DELETE with user who doesn't have create permission on this domain - headers = helper.getRequestHeaders( - domain=self.base_domain, username="test_user2" - ) - rsp = self.session.delete(req, headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden - # try to do a DELETE with a different domain (should fail) another_domain = helper.getParentDomain(self.base_domain) headers = helper.getRequestHeaders(domain=another_domain) diff --git a/tests/integ/domain_test.py b/tests/integ/domain_test.py index 441b2eed..ac1721b4 100755 --- a/tests/integ/domain_test.py +++ b/tests/integ/domain_test.py @@ -174,6 +174,15 @@ def testGetDomain(self): root_uuid_3 = rspJson["root"] self.assertEqual(root_uuid, root_uuid_3) + # "domain" param with "hdf5://" prefix + params = {"domain": f"hdf5:/{domain}"} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + self.assertEqual(rsp.headers["content-type"], "application/json; charset=utf-8") + rspJson = json.loads(rsp.text) + root_uuid_3 = rspJson["root"] + self.assertEqual(root_uuid, root_uuid_3) + # verify that request with invalid domain fails domain = domain[1:] # strip off the '/' params = {"domain": domain} @@ -272,6 +281,7 @@ def testGetTopLevelDomain(self): domain = "/home" print("testGetTopLevelDomain", domain) headers = helper.getRequestHeaders(domain=domain) + user_name = config.get("user_name") req = helper.getEndpoint() + "/" rsp = self.session.get(req, headers=headers) @@ -282,7 +292,7 @@ def testGetTopLevelDomain(self): self.assertTrue("hrefs" in rspJson) self.assertTrue("class" in rspJson) self.assertEqual(rspJson["class"], "folder") - domain = "test_user1.home" + domain = f"{user_name}.home" headers = helper.getRequestHeaders(domain=domain) req = helper.getEndpoint() + "/" @@ -555,6 +565,11 @@ def testCreateFolder(self): def testAclInheritence(self): # this test is here (rather than acl_test.py) since we need to create domains in a folder print("testAclInheritence", self.base_domain) + user2name = config.get("user2_name") + if not user2name: + print("user2_name not set, skipping test") + return + folder = self.base_domain + "/a_folder" headers = helper.getRequestHeaders(domain=folder) req = helper.getEndpoint() + "/" @@ -564,7 +579,6 @@ def testAclInheritence(self): default_public = config.get("default_public") # create an ACL for "test_user2" with read and update access - user2name = config.get("user2_name") req = helper.getEndpoint() + "/acls/" + user2name perm = {"read": True, "update": True} @@ -800,9 +814,12 @@ def testDeleteDomain(self): # try deleting the domain with a user who doesn't have permissions' user2_name = config.get("user2_name") - headers = helper.getRequestHeaders(domain=self.base_domain, username=user2_name) - rsp = self.session.delete(req, headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + if user2_name: + headers = helper.getRequestHeaders(domain=self.base_domain, username=user2_name) + rsp = self.session.delete(req, headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("user2_name not set") # delete the domain (with the orginal user) headers = helper.getRequestHeaders(domain=domain) @@ -826,41 +843,49 @@ def testDeleteDomain(self): rsp = self.session.get(root_req, headers=headers) self.assertEqual(rsp.status_code, 200) - # delete the domain with the admin account - try: - admin_username = config.get("admin_username") + admin_username = config.get("admin_username") + if admin_username: + # delete the domain with the admin account admin_passwd = config.get("admin_password") headers = helper.getRequestHeaders( domain=domain, username=admin_username, password=admin_passwd ) rsp = self.session.delete(req, headers=headers) self.assertEqual(rsp.status_code, 200) - except KeyError: + else: msg = "Skipping admin delete test, set ADMIN_USERNAME and ADMIN_PASSWORD" msg += "environment variables to enable" print(msg) - # try creating a folder using the owner flag - try: - admin_username = config.get("admin_username") - admin_passwd = config.get("admin_password") - username = config.get("user2_name") - new_domain = f"{self.base_domain}/{username}_folder" - body = {"folder": True, "owner": username} - headers = helper.getRequestHeaders( - domain=new_domain, username=admin_username, password=admin_passwd - ) - rsp = self.session.put(req, headers=headers, data=json.dumps(body)) - self.assertEqual(rsp.status_code, 201) - - headers = helper.getRequestHeaders(domain=new_domain, username=username) - rsp = self.session.get(req, headers=headers) - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - except KeyError: - msg = "Skipping domain create with owner test, set ADMIN_USERNAME" - msg += " and ADMIN_PASSWORD environment variables to enable" - print(msg) + username = config.get("user2_name") + admin_username = config.get("admin_username") + + if username and admin_username: + + # try creating a folder using the owner flag + try: + admin_passwd = config.get("admin_password") + new_domain = f"{self.base_domain}/{username}_folder" + body = {"folder": True, "owner": username} + headers = helper.getRequestHeaders( + domain=new_domain, username=admin_username, password=admin_passwd + ) + rsp = self.session.put(req, headers=headers, data=json.dumps(body)) + self.assertEqual(rsp.status_code, 201) + + headers = helper.getRequestHeaders(domain=new_domain, username=username) + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + except KeyError: + msg = "Skipping domain create with owner test, set ADMIN_USERNAME" + msg += " and ADMIN_PASSWORD environment variables to enable" + print(msg) + else: + if not username: + print("user2_name not set") + elif not admin_username: + print("admin_username not set") def testDomainCollections(self): domain = helper.getTestDomain("tall.h5") diff --git a/tests/integ/group_test.py b/tests/integ/group_test.py index 31b31aa3..eebd1b02 100755 --- a/tests/integ/group_test.py +++ b/tests/integ/group_test.py @@ -60,16 +60,20 @@ def testGetRootGroup(self): self.assertTrue("attributeCount" in rspJson) # try get with a different user (who has read permission) - headers = helper.getRequestHeaders( - domain=self.base_domain, username="test_user2" - ) - rsp = self.session.get(req, headers=headers) - if config.get("default_public"): - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - self.assertEqual(rspJson["root"], root_uuid) + user2_name = config.get("user2_name") + if user2_name: + headers = helper.getRequestHeaders( + domain=self.base_domain, username=user2_name + ) + rsp = self.session.get(req, headers=headers) + if config.get("default_public"): + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertEqual(rspJson["root"], root_uuid) + else: + self.assertEqual(rsp.status_code, 403) else: - self.assertEqual(rsp.status_code, 403) + print("user2_name not set") # try to do a GET with a different domain (should fail) another_domain = helper.getParentDomain(self.base_domain) @@ -217,6 +221,11 @@ def testPost(self): self.assertEqual(rspJson["alias"], []) # try POST with user who doesn't have create permission on this domain + test_user2 = config.get("user2_name") # some tests will be skipped if not set + if not test_user2: + print("test_user2 not set") + return + headers = helper.getRequestHeaders( domain=self.base_domain, username="test_user2" ) @@ -385,11 +394,15 @@ def testDelete(self): # self.assertEqual(rspJson["domain"], self.base_domain) #TBD # try DELETE with user who doesn't have create permission on this domain - headers = helper.getRequestHeaders( - domain=self.base_domain, username="test_user2" - ) - rsp = self.session.delete(req, headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + test_user2 = config.get("user2_name") # some tests will be skipped if not set + if test_user2: + headers = helper.getRequestHeaders( + domain=self.base_domain, username="test_user2" + ) + rsp = self.session.delete(req, headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("test_user2 not set") # try to do a DELETE with a different domain (should fail) another_domain = helper.getParentDomain(self.base_domain) diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py index c06ecdfa..02392cdb 100755 --- a/tests/integ/link_test.py +++ b/tests/integ/link_test.py @@ -15,6 +15,7 @@ import json import uuid import helper +import config class LinkTest(unittest.TestCase): @@ -36,6 +37,7 @@ def testHardLink(self): helper.setupDomain(domain) headers = helper.getRequestHeaders(domain=domain) req = helper.getEndpoint() + "/" + test_user2 = config.get("user2_name") # some tests will be skipped if not set rsp = self.session.get(req, headers=headers) self.assertEqual(rsp.status_code, 200) @@ -66,12 +68,16 @@ def testHardLink(self): self.assertEqual(rsp.status_code, 404) # link doesn't exist yet # try creating a link with a different user (should fail) - headers = helper.getRequestHeaders(domain=domain, username="test_user2") - payload = {"id": grp1_id} - rsp = self.session.put(req, data=json.dumps(payload), headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + if test_user2: + headers = helper.getRequestHeaders(domain=domain, username=test_user2) + payload = {"id": grp1_id} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("test_user2 name not set") # create "/g1" with original user + payload = {"id": grp1_id} headers = helper.getRequestHeaders(domain=domain) rsp = self.session.put(req, data=json.dumps(payload), headers=headers) self.assertEqual(rsp.status_code, 201) # created @@ -107,10 +113,13 @@ def testHardLink(self): self.assertEqual(rspJson["linkCount"], 1) # link count is 1 # try deleting link with a different user (should fail) - headers = helper.getRequestHeaders(domain=domain, username="test_user2") - req = helper.getEndpoint() + "/groups/" + root_id + "/links/" + link_title - rsp = self.session.delete(req, headers=headers) - self.assertEqual(rsp.status_code, 403) # forbidden + if test_user2: + headers = helper.getRequestHeaders(domain=domain, username=test_user2) + req = helper.getEndpoint() + "/groups/" + root_id + "/links/" + link_title + rsp = self.session.delete(req, headers=headers) + self.assertEqual(rsp.status_code, 403) # forbidden + else: + print("user2_name not set") # delete the link with original user req = helper.getEndpoint() + "/groups/" + root_id + "/links/" + link_title @@ -674,7 +683,7 @@ def testExternalLinkTraversal(self): group_id = rspJson["id"] self.assertTrue(helper.validateId(group_id)) - # create hard link to external group + # create hard link to group link_title = "external_group" req = helper.getEndpoint() + "/groups/" + root_id_2 + "/links/" + link_title headers = helper.getRequestHeaders(domain=second_domain) @@ -711,14 +720,47 @@ def testExternalLinkTraversal(self): # make a request by path with external_link along the way # request without 'follow external links' param should receive 400 headers = helper.getRequestHeaders(domain=domain) - req = helper.getEndpoint() + "/" + "?h5path=/external_link_to_group/child_group" - rsp = self.session.get(req, headers=headers) + h5path = f"/{link_title}/child_group" + req = helper.getEndpoint() + "/" + params = {"h5path": h5path} + rsp = self.session.get(req, headers=headers, params=params) self.assertEqual(rsp.status_code, 400) + params["follow_external_links"] = 1 + rsp = self.session.get(req, headers=headers, params=params) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + keys = ["domain", "linkCount", "attributeCount", "id"] + for k in keys: + self.assertTrue(k in rspJson) + + self.assertEqual(rspJson["id"], child_group_id) + self.assertTrue(helper.validateId(rspJson["id"])) + self.assertEqual(rspJson["domain"], second_domain) + self.assertEqual(rspJson["linkCount"], 0) + self.assertEqual(rspJson["attributeCount"], 0) + self.assertEqual(rspJson["class"], "group") + + # create external link with same target but using "hdf5://" prefix + target_path = "/external_group" + link_title = "external_link_to_group_prefix" + req = helper.getEndpoint() + "/groups/" + root_id + "/links/" + link_title + payload = {"h5path": target_path, "h5domain": f"hdf5:/{second_domain}"} headers = helper.getRequestHeaders(domain=domain) - req = helper.getEndpoint() + "/" + "?h5path=/external_link_to_group/child_group" \ - + "&follow_external_links=1" - rsp = self.session.get(req, headers=headers) + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # make a request by path with external_link along the way + # request without 'follow external links' param should receive 400 + headers = helper.getRequestHeaders(domain=domain) + h5path = f"/{link_title}/child_group" + req = helper.getEndpoint() + "/" + params = {"h5path": h5path} + rsp = self.session.get(req, headers=headers, params=params) + self.assertEqual(rsp.status_code, 400) + + params["follow_external_links"] = 1 + rsp = self.session.get(req, headers=headers, params=params) self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) keys = ["domain", "linkCount", "attributeCount", "id"] diff --git a/tests/integ/rangeget_test.py b/tests/integ/rangeget_test.py deleted file mode 100755 index 8de68297..00000000 --- a/tests/integ/rangeget_test.py +++ /dev/null @@ -1,123 +0,0 @@ -############################################################################## -# Copyright by The HDF Group. # -# All rights reserved. # -# # -# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and # -# Utilities. The full HSDS copyright notice, including # -# terms governing use, modification, and redistribution, is contained in # -# the file COPYING, which can be found at the root of the source code # -# distribution tree. If you do not have access to this file, you may # -# request a copy from help@hdfgroup.org. # -############################################################################## -import unittest -import helper -import config - -# min/max chunk size - these can be set by config, but -# practially the min config value should be larger than -# CHUNK_MIN and the max config value should less than -# CHUNK_MAX -CHUNK_MIN = 1024 # lower limit (1024b) -CHUNK_MAX = 50 * 1024 * 1024 # upper limit (50M) - - -class RangeGetTest(unittest.TestCase): - def __init__(self, *args, **kwargs): - super(RangeGetTest, self).__init__(*args, **kwargs) - self.endpoint = helper.getRangeGetEndpoint() - - def setUp(self): - self.session = helper.getSession() - - def tearDown(self): - if self.session: - self.session.close() - - # main - - def testRangeGetBytes(self): - print("testRangeGetBytes") - - hdf5_sample_bucket = config.get("hdf5_sample_bucket") - if not hdf5_sample_bucket: - print("hdf5_sample_bucket config not set, skipping testRangeGetBytes") - return - - tall_json = helper.getHDF5JSON("tall.json") - if not tall_json: - print("tall.json file not found, skipping testRangeGetBytes") - return - - if "tall.h5" not in tall_json: - self.assertTrue(False) - - chunk_info = tall_json["tall.h5"] - if "/g1/g1.1/dset1.1.2" not in chunk_info: - self.assertTrue(False) - - dset112_info = chunk_info["/g1/g1.1/dset1.1.2"] - if "byteStreams" not in dset112_info: - self.assertTrue(False) - byteStreams = dset112_info["byteStreams"] - - # should be just one element for this contiguous dataset - self.assertTrue(len(byteStreams), 1) - byteStream = byteStreams[0] - dset112_offset = byteStream["file_offset"] - dset112_size = byteStream["size"] - self.assertEqual(dset112_size, 80) - - req_headers = {"accept": "application/octet-stream"} - req = self.endpoint + "/" - - params = {} - params["bucket"] = hdf5_sample_bucket - params["key"] = "data/hdf5test/tall.h5" - params["offset"] = dset112_offset - params["length"] = dset112_size - rsp = self.session.get(req, headers=req_headers, params=params) - self.assertEqual(rsp.status_code, 200) - - self.assertEqual(rsp.headers["Content-Type"], "application/octet-stream") - data = rsp.content - self.assertEqual(len(data), dset112_size) - # content should be 4-byte little-endian integers 0 thru 19 - for i in range(dset112_size): - if i % 4 == 3: - self.assertEqual(data[i], i // 4) - else: - self.assertEqual(data[i], 0) - - # try reading last 5 bytes - params = {} - params["bucket"] = hdf5_sample_bucket - params["key"] = "data/hdf5test/tall.h5" - params["offset"] = 8287 - params["length"] = 5 - rsp = self.session.get(req, headers=req_headers, params=params) - self.assertEqual(rsp.status_code, 200) - - self.assertEqual(rsp.headers["Content-Type"], "application/octet-stream") - data = rsp.content - self.assertEqual(len(data), 5) - self.assertEqual(data, b"path\x00") - - # try reading last 5000 - params = {} - params["bucket"] = hdf5_sample_bucket - params["key"] = "data/hdf5test/tall.h5" - params["offset"] = 3292 - params["length"] = 5000 - rsp = self.session.get(req, headers=req_headers, params=params) - self.assertEqual(rsp.status_code, 200) - - self.assertEqual(rsp.headers["Content-Type"], "application/octet-stream") - data = rsp.content - self.assertEqual(len(data), 5000) - self.assertEqual(data[0:5], b"\x00\x00\x00\x00\x00") - - -if __name__ == "__main__": - # setup test files - - unittest.main()